From 7cd4fa74f6b6da7a517d7023118560ee272f8a46 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 9 Oct 2019 13:37:18 +0200 Subject: [PATCH 01/19] ENH: YAML config of vector studies --- work/plasmids/analysis.py | 39 ++ work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml | 24 + work/plasmids/main.py | 66 +++ work/plasmids/utils.py | 10 +- work/plasmids/vector.py | 443 +++++++++---------- 5 files changed, 346 insertions(+), 236 deletions(-) create mode 100644 work/plasmids/analysis.py create mode 100644 work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml create mode 100644 work/plasmids/main.py diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py new file mode 100644 index 0000000..a16fb53 --- /dev/null +++ b/work/plasmids/analysis.py @@ -0,0 +1,39 @@ +from etfl.optim.utils import check_solution +from etfl.optim.variables import mRNAVariable, EnzymeVariable + + +def chebyshev(model, chebyshev_include, inplace=True, big_m=1000): + from etfl.optim.variables import mRNAVariable,EnzymeVariable + from etfl.optim.constraints import ForwardCatalyticConstraint,BackwardCatalyticConstraint + from etfl.analysis.dynamic import chebyshev_center, compute_center + + chebyshev_variables = model.get_variables_of_type(mRNAVariable) + chebyshev_variables += model.get_variables_of_type(EnzymeVariable) + + chebyshev_center(model, chebyshev_variables, + inplace=inplace, + big_m=big_m, + include=chebyshev_include) + + # Revert to the previous objective, as chebyshev_center sets it to maximize + # the radius + model.objective = the_obj + + # We want to compute the center under the constraint of the growth at the + # initial solution. + chebyshev_sol = compute_center(model, provided_solution=initial_solution) + print_standard_sol(model) + + +def export_mrna(model, tag, solution=None): + solution = check_solution(solution) + + mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('id') + + data = solution.raw + mrna_data = data[mrna_var_ids] + mrna_data.name = tag + + filename = 'outputs/' + tag + 'mrna' + '.csv' + + mrna_data.to_csv(filename, header=True) \ No newline at end of file diff --git a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml new file mode 100644 index 0000000..aab6683 --- /dev/null +++ b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml @@ -0,0 +1,24 @@ +model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191007_142013.json' +tag: 'vEFL_BDO_1c_RNAP' +simulation: + copy_number: 1 + vector: 'plasmid_pET-AR-ALS' + add_rnap: yes +assumptions: + growth_rate: 'auto' +analysis: + # Export values ? + export_peptides: yes + export_mrna: yes + export_fluxes: yes + # Perform Chebyshev centering ? + chebyshev: + chebyshev_include: + - ForwardCatalyticConstraint + - BackwardCatalyticConstraint + - RNAPAllocation + - ExpressionCoupling + inplace: false +options: + verbose: false + inplace: true diff --git a/work/plasmids/main.py b/work/plasmids/main.py new file mode 100644 index 0000000..aa31561 --- /dev/null +++ b/work/plasmids/main.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +.. module:: ETFL + :platform: Unix, Windows + :synopsis: Models vectors in ETFL models + +.. moduleauthor:: ETFL team + +Model RNAP limitation in case of vector addition to a host +""" + +from etfl.io.json import load_json_model +from etfl.optim.config import standard_solver_config +from etfl.analysis.summary import print_standard_sol +from etfl.core.vector import TransModel +import yaml +import sys + +from utils import read_config +from vector import get_bdo_plasmid + +if len(sys.argv)<=1: + raise ValueError('Please provide a YAML configuration file') +else: + CONFIG = sys.argv[1] + +print('Using configuration file: {}'.format(CONFIG)) + +vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid} + +if __name__ == '__main__': + config = read_config(CONFIG) + + if config['model'] != 'debug': + model = load_json_model(config['model']) + else: + from etfl.tests.small_model import create_etfl_model + model = create_etfl_model(0,1) + + standard_solver_config(model) + model.solver.configuration.verbosity = config['options']['verbose'] + + copy_number = config['simulation']['copy_number'] + vector_generator = vec_dict[config['simulation']['vector']] + has_rnap = config['simulation']['add_rnap'] + + my_plasmid = vector_generator(model, has_rnap) + + + ##################### + # Model integration # + ##################### + + transmodel = TransModel(model, inplace = config['options']['inplace']) + transmodel.add_vector(my_plasmid, copy_number = copy_number) + + transmodel.optimize() + print_standard_sol(transmodel) + + for analysis,arguments in config['analysis']: + if arguments != False: + arguments['model'] = transmodel + arguments['tag'] = config['tag'] + analysis(**arguments) + + # Check antibiotic resistance is expressed ? \ No newline at end of file diff --git a/work/plasmids/utils.py b/work/plasmids/utils.py index df40aef..625596f 100644 --- a/work/plasmids/utils.py +++ b/work/plasmids/utils.py @@ -9,6 +9,7 @@ Utility functions """ +import yaml def read_seq(filename): with open(filename,'r') as fid: @@ -17,4 +18,11 @@ def read_seq(filename): seq = ''.join([x for line in all_lines for x in line.split() if not x.isdigit()]) - return seq \ No newline at end of file + return seq + +def read_config(yaml_file): + with open(yaml_file, 'rb') as f: + conf = yaml.load(f.read(), Loader=yaml.SafeLoader) # load the config file + return conf + + diff --git a/work/plasmids/vector.py b/work/plasmids/vector.py index 6f65b29..70fa388 100644 --- a/work/plasmids/vector.py +++ b/work/plasmids/vector.py @@ -1,242 +1,215 @@ -# -*- coding: utf-8 -*- -""" -.. module:: ETFL - :platform: Unix, Windows - :synopsis: Models vectors in ETFL models - -.. moduleauthor:: ETFL team - -Model RNAP limitation in case of vector addition to a host -""" - -from etfl.io.json import load_json_model -from etfl.core.vector import TransModel, Plasmid +from etfl.data.ecoli import kdeg_enz, kdeg_mrna, get_average_kcat, get_rnap, \ + get_nt_sequences from etfl.core.genes import ExpressedGene from etfl.core.rna import mRNA from etfl.core.enzyme import Enzyme -from etfl.tests.small_model import create_etfl_model -from etfl.data.ecoli import kdeg_enz, kdeg_mrna, get_average_kcat, get_rnap, \ - get_nt_sequences -from etfl.analysis.summary import print_standard_sol +from etfl.core.vector import TransModel, Plasmid + +from utils import read_seq from cobra.core import Metabolite, Reaction -from utils import read_seq -# E. coli - -model = load_json_model('../../tutorials/models/' - 'iJO1366_vEFL_v_0.10_431_enz_128_bins__20191003_100718.json') - -# model = create_etfl_model(0,1) -# model = create_etfl_model(0,0) - -# Plasmid pZS*-13S-ald-adh -# US Patent 20,140,371,417. -# Used in Andreozzi, Stefano, et al. -# "Identification of metabolic engineering targets for the enhancement of -# 1,4-butanediol production in recombinant E. coli using large-scale -# kinetic models." -# Metabolic engineering 35 (2016): 148-159. - -# (too complicated) - -# 2-gene BDO plasmid -# Reshamwala, Shamlan MS, Shalini S. Deb, and Arvind M. Lali. -# "A shortened, two-enzyme pathway for 2, 3-butanediol production in -# Escherichia coli." -# Journal of industrial microbiology & biotechnology 44.9 (2017): 1273-1277. - -############### -# Metabolites # -############### -acetoin = Metabolite(id='acetoin_c', name = 'Acetoin, cytosol', - formula = 'C4H8O2', compartment='c') -diacetyl = Metabolite(id='diacetyl_c', name = 'Diacetyl, cytosol', - formula = 'C4H6O2', compartment='c') -bdo = Metabolite(id='bdo_c', name='Meso-2,3-butanediol, cytosol', - formula = 'C4H10O2', compartment='c') -bdo_e = Metabolite(id='bdo_e', name='Meso-2,3-butanediol, extracellular', - formula = 'C4H10O2', compartment='e') -acetolactate= model.metabolites.alac__S_c -nadh = model.metabolites.nadh_c -nad = model.metabolites.nad_c -pyr = model.metabolites.pyr_c -co2 = model.metabolites.co2_c -h = model.metabolites.h_c -############# -# Reactions # -############# - -# ALS: 2 pyr + h => alac + co2 -als = Reaction(id = 'ALS', name = 'Acetolactate synthase [Plasmid](Enterobacter)') -als.add_metabolites({ - pyr:-2, - h:-1, - acetolactate:1, - co2:1 -}) - -# SALD: alac => diacetyl + co2 (spontaneous) -sald = Reaction(id = 'SALD', name = 'Spontaneous acetolactate decarboxylation [Plasmid]') -sald.add_metabolites({ - acetolactate:-1, - diacetyl:1, - co2:1 -}) - -# AR: diacetyl + NADH => acetoin + NAD+############# - -ar = Reaction(id = 'AR', name = 'Acetoin oxidoreductase (NADH) [Plasmid]') -ar.add_metabolites({ - diacetyl:-1, - nadh:-1, - h:-1, - acetoin:1, - nad:1 -}) - -# BDH: acetoin + NADH => bdo + NAD+ -bdh = Reaction(id = 'BDH', name = 'Butanediol dehydrogenase [Plasmid]') -bdh.add_metabolites({ - acetoin:-1, - nadh:-1, - h:-1, - bdo:1, - nad:1 -}) - -# BDO Transport -bdo_tp = Reaction(id='BDOtp', name = 'BDO Transport (extracellular)') -bdo_tp.add_metabolites({ - bdo:-1, - bdo_e:1 -}) - -# BDO Exchange -bdo_ex = Reaction(id='EX_BDO', name = 'BDO Exchange') -bdo_ex.add_metabolites({ - bdo_e:-1, -}) - -reaction_list = [als, sald, ar, bdh, bdo_tp, bdo_ex] - -############# -# Genes # -############# - -ALS = ExpressedGene(id = 'EBA_als', - name = 'acetolactate synthase', - sequence = read_seq('data/ALS.txt')) -AR = ExpressedGene (id = 'EBA_ar', - name = 'acetoin reductase', - sequence = read_seq('data/AR.txt')) -fwd_als= 'GGAATTCCATATGAACAGTGAGAAACAGTC' -rev_als= 'CCGAGCTCTTACAAAATCTGGCTGAGAT' -fwd_ar = 'GGAATTCCATATGCAAAAAGTTGCTCTCGTAAC' -rev_ar = 'CCGAGCTCTTAGTTGAACACCATCCCAC' - -pET = read_seq('data/pET.txt') - -############# -# Enzymes # -############# - -# ALS is 60 kDa, and is a homotrimer -# Kaushal, Anju, Sunil Pabbi, and Prince Sharma. -# "Characterization of 2, 3-butanediol-forming and valine-sensitive -# α-acetolactate synthase of Enterobacter cloacae." -# World Journal of Microbiology and Biotechnology 19.5 (2003): 487-493. - -# ALS kcat is 26.9 +- 2.5[s-1] -# Choi, Myeong-Hyeon, et al. -# "Molecular cloning and expression of Enterobacter aerogenes α-acetolactate -# decarboxylase in pyruvate decarboxylase-deficient Saccharomyces cerevisiae -# for efficient 2, 3-butanediol production." -# Process biochemistry 51.2 (2016): 170-176. - -ALS_enz = Enzyme( - id = 'ALS', - kcat=26.9*3600, - kdeg=kdeg_enz, - composition = {'EBA_als':3} -) - -# The third enzyme in the 2,3-butanediol pathway, AR, purified from A.aerogenes -# consists of four equal-size sub-unitsof 25kDa(40). - -# Blomqvist, Kristina, et al. -# "Characterization of the genes of the 2, 3-butanediol operons from Klebsiella -# terrigena and Enterobacter aerogenes." -# Journal of bacteriology 175.5 (1993): 1392-1404. -# & -# Stormer, Fredrik C. -# "[106] 2, 3-Butanediol biosynthetic system in Aerobacter aerogenes." -# Methods in enzymology. Vol. 41. Academic Press, 1975. 518-533. - -AR_enz = Enzyme( - id = 'AR', - kcat=get_average_kcat(), - kdeg=kdeg_enz, - composition = {'EBA_ar':4} -) - -# Probably the catalytic activities are different -BDH_enz = Enzyme( - id = 'BDH', - kcat=get_average_kcat(), - kdeg=kdeg_enz, - composition = {'EBA_ar':4} -) - -############ -# Coupling # -############ - -coupling_dict = {'ALS':[ALS_enz], 'AR':[AR_enz], 'BDH':[BDH_enz]} - -# AR and BDH are catalyzed by the same enzyme - -########### -# Plasmid # -########### - -rnap = get_rnap() -rnap.id = 'plasmid_rnap' - -nt_seq_ecoli = get_nt_sequences() -rnap_genes = list() -for g in rnap.composition: - this_gene = ExpressedGene(id='PLASMID_'+g, - name='{}, Plasmid', - sequence = nt_seq_ecoli[g]) - rnap_genes.append(this_gene) - -gene_list = [ALS,AR] + rnap_genes - -plasmid_seq = pET \ - + fwd_als + ALS.sequence + rev_als \ - + fwd_ar + AR.sequence + rev_ar \ - + ''.join(str(g.sequence) for g in rnap_genes) - -my_plasmid = Plasmid(id_ = 'pET-AR-ALS', - sequence = plasmid_seq, - genes = gene_list, - reactions = reaction_list, - coupling_dict = coupling_dict, - rnap = rnap) -my_plasmid.build_default_mrna(kdeg_mrna) - -##################### -# Model integration # -##################### - -transmodel = TransModel(model, inplace = True) -transmodel.add_vector(my_plasmid, copy_number = 5) - - -# Check antibiotic resistance is expressed ? - - -transmodel.optimize() -print_standard_sol(transmodel) \ No newline at end of file +def get_bdo_plasmid(model, has_rnap=False): + + # Plasmid pZS*-13S-ald-adh + # US Patent 20,140,371,417. + # Used in Andreozzi, Stefano, et al. + # "Identification of metabolic engineering targets for the enhancement of + # 1,4-butanediol production in recombinant E. coli using large-scale + # kinetic models." + # Metabolic engineering 35 (2016): 148-159. + + # (too complicated) + + # 2-gene BDO plasmid + # Reshamwala, Shamlan MS, Shalini S. Deb, and Arvind M. Lali. + # "A shortened, two-enzyme pathway for 2, 3-butanediol production in + # Escherichia coli." + # Journal of industrial microbiology & biotechnology 44.9 (2017): 1273-1277. + + ############### + # Metabolites # + ############### + acetoin = Metabolite(id='acetoin_c', name = 'Acetoin, cytosol', + formula = 'C4H8O2', compartment='c') + diacetyl = Metabolite(id='diacetyl_c', name = 'Diacetyl, cytosol', + formula = 'C4H6O2', compartment='c') + bdo = Metabolite(id='bdo_c', name='Meso-2,3-butanediol, cytosol', + formula = 'C4H10O2', compartment='c') + bdo_e = Metabolite(id='bdo_e', name='Meso-2,3-butanediol, extracellular', + formula = 'C4H10O2', compartment='e') + acetolactate= model.metabolites.alac__S_c + nadh = model.metabolites.nadh_c + nad = model.metabolites.nad_c + pyr = model.metabolites.pyr_c + co2 = model.metabolites.co2_c + h = model.metabolites.h_c + ############# + # Reactions # + ############# + + # ALS: 2 pyr + h => alac + co2 + als = Reaction(id = 'ALS', name = 'Acetolactate synthase [Plasmid](Enterobacter)') + als.add_metabolites({ + pyr:-2, + h:-1, + acetolactate:1, + co2:1 + }) + + # SALD: alac => diacetyl + co2 (spontaneous) + sald = Reaction(id = 'SALD', name = 'Spontaneous acetolactate decarboxylation [Plasmid]') + sald.add_metabolites({ + acetolactate:-1, + diacetyl:1, + co2:1 + }) + + # AR: diacetyl + NADH => acetoin + NAD+############# + + ar = Reaction(id = 'AR', name = 'Acetoin oxidoreductase (NADH) [Plasmid]') + ar.add_metabolites({ + diacetyl:-1, + nadh:-1, + h:-1, + acetoin:1, + nad:1 + }) + + # BDH: acetoin + NADH => bdo + NAD+ + bdh = Reaction(id = 'BDH', name = 'Butanediol dehydrogenase [Plasmid]') + bdh.add_metabolites({ + acetoin:-1, + nadh:-1, + h:-1, + bdo:1, + nad:1 + }) + + # BDO Transport + bdo_tp = Reaction(id='BDOtp', name = 'BDO Transport (extracellular)') + bdo_tp.add_metabolites({ + bdo:-1, + bdo_e:1 + }) + + # BDO Exchange + bdo_ex = Reaction(id='EX_BDO', name = 'BDO Exchange') + bdo_ex.add_metabolites({ + bdo_e:-1, + }) + + reaction_list = [als, sald, ar, bdh, bdo_tp, bdo_ex] + + ############# + # Genes # + ############# + + ALS = ExpressedGene(id = 'EBA_als', + name = 'acetolactate synthase', + sequence = read_seq('data/ALS.txt')) + AR = ExpressedGene (id = 'EBA_ar', + name = 'acetoin reductase', + sequence = read_seq('data/AR.txt')) + fwd_als= 'GGAATTCCATATGAACAGTGAGAAACAGTC' + rev_als= 'CCGAGCTCTTACAAAATCTGGCTGAGAT' + fwd_ar = 'GGAATTCCATATGCAAAAAGTTGCTCTCGTAAC' + rev_ar = 'CCGAGCTCTTAGTTGAACACCATCCCAC' + + pET = read_seq('data/pET.txt') + + ############# + # Enzymes # + ############# + + # ALS is 60 kDa, and is a homotrimer + # Kaushal, Anju, Sunil Pabbi, and Prince Sharma. + # "Characterization of 2, 3-butanediol-forming and valine-sensitive + # α-acetolactate synthase of Enterobacter cloacae." + # World Journal of Microbiology and Biotechnology 19.5 (2003): 487-493. + + # ALS kcat is 26.9 +- 2.5[s-1] + # Choi, Myeong-Hyeon, et al. + # "Molecular cloning and expression of Enterobacter aerogenes α-acetolactate + # decarboxylase in pyruvate decarboxylase-deficient Saccharomyces cerevisiae + # for efficient 2, 3-butanediol production." + # Process biochemistry 51.2 (2016): 170-176. + + ALS_enz = Enzyme( + id = 'ALS', + kcat=26.9*3600, + kdeg=kdeg_enz, + composition = {'EBA_als':3} + ) + + # The third enzyme in the 2,3-butanediol pathway, AR, purified from A.aerogenes + # consists of four equal-size sub-unitsof 25kDa(40). + + # Blomqvist, Kristina, et al. + # "Characterization of the genes of the 2, 3-butanediol operons from Klebsiella + # terrigena and Enterobacter aerogenes." + # Journal of bacteriology 175.5 (1993): 1392-1404. + # & + # Stormer, Fredrik C. + # "[106] 2, 3-Butanediol biosynthetic system in Aerobacter aerogenes." + # Methods in enzymology. Vol. 41. Academic Press, 1975. 518-533. + + AR_enz = Enzyme( + id = 'AR', + kcat=get_average_kcat(), + kdeg=kdeg_enz, + composition = {'EBA_ar':4} + ) + + # Probably the catalytic activities are different + BDH_enz = Enzyme( + id = 'BDH', + kcat=get_average_kcat(), + kdeg=kdeg_enz, + composition = {'EBA_ar':4} + ) + + ############ + # Coupling # + ############ + + coupling_dict = {'ALS':[ALS_enz], 'AR':[AR_enz], 'BDH':[BDH_enz]} + + # AR and BDH are catalyzed by the same enzyme + + ########### + # Plasmid # + ########### + + if has_rnap: + rnap = get_rnap() + rnap.id = 'plasmid_rnap' + + nt_seq_ecoli = get_nt_sequences() + rnap_genes = list() + for g in rnap.composition: + this_gene = ExpressedGene(id='PLASMID_'+g, + name='{}, Plasmid', + sequence = nt_seq_ecoli[g]) + rnap_genes.append(this_gene) + else: + rnap_genes = [] + rnap = None + + gene_list = [ALS,AR] + rnap_genes + + plasmid_seq = pET \ + + fwd_als + ALS.sequence + rev_als \ + + fwd_ar + AR.sequence + rev_ar \ + + ''.join(str(g.sequence) for g in rnap_genes) + + my_plasmid = Plasmid(id_ = 'pET-AR-ALS', + sequence = plasmid_seq, + genes = gene_list, + reactions = reaction_list, + coupling_dict = coupling_dict, + rnap = rnap) + my_plasmid.build_default_mrna(kdeg_mrna) + + return my_plasmid \ No newline at end of file From 76ed38e265ee2dc527550daf7119b4448050c63c Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 16 Oct 2019 15:59:06 +0200 Subject: [PATCH 02/19] FIX: give a sequence to the dummy gene, so that gene.sequence has a non-0 length --- etfl/core/memodel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py index d596be5..e752557 100644 --- a/etfl/core/memodel.py +++ b/etfl/core/memodel.py @@ -301,10 +301,11 @@ def add_dummies(self, nt_ratios, mrna_kdeg, mrna_length, aa_ratios, add_interpolation_variables(self) # Create a dummy gene and override the sequences with input data + dummy_sequence = 'N'*mrna_length dummy_gene = ExpressedGene(id='dummy_gene', name='Dummy Gene', - sequence='') - dummy_gene._rna = 'N'*mrna_length + sequence=dummy_sequence) + dummy_gene._rna = dummy_sequence dummy_gene._peptide = 'X'*peptide_length self.add_genes([dummy_gene]) From eaf831bead82fd5919d10708fbc2e6d6a1cecccf Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 16 Oct 2019 16:00:04 +0200 Subject: [PATCH 03/19] FIX: return name rather than constraint object in check_binding_constraints --- etfl/optim/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etfl/optim/utils.py b/etfl/optim/utils.py index 8f881a1..acf43c3 100644 --- a/etfl/optim/utils.py +++ b/etfl/optim/utils.py @@ -456,7 +456,7 @@ def safe_optim(model): def get_binding_constraints(model, epsilon): if model.problem.__name__ == 'optlang.gurobi_interface': - return {kind:[c + return {kind:[c.name for c in these_cons if c.constraint._internal_constraint.Slack <= epsilon] for kind,these_cons in model._cons_kinds.items()} From 850c12846fa84eb8ac72075eabc76f27d3a87a2c Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 16 Oct 2019 16:01:08 +0200 Subject: [PATCH 04/19] ENH: add option to revert model after chebyshev center computation, or not. Useful for getting primal, slack, etc from the model, as they are erased if the model is modified --- etfl/analysis/dynamic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/etfl/analysis/dynamic.py b/etfl/analysis/dynamic.py index ad34f9a..0c5dfc7 100644 --- a/etfl/analysis/dynamic.py +++ b/etfl/analysis/dynamic.py @@ -361,7 +361,7 @@ def update_medium(t, Xi, Si, dmodel, medium_fun, timestep): return X,S -def compute_center(dmodel, provided_solution=None): +def compute_center(dmodel, provided_solution=None, revert_changes=True): """ Fixes growth to be above computed lower bound, finds chebyshev center, resets the model, returns solution data @@ -403,8 +403,9 @@ def compute_center(dmodel, provided_solution=None): dmodel.optimize() chebyshev_sol = dmodel.solution - dmodel.growth_reaction.lower_bound = prev_lb - dmodel.objective = prev_obj + if revert_changes: + dmodel.growth_reaction.lower_bound = prev_lb + dmodel.objective = prev_obj return chebyshev_sol From d1ce0e61cc7cc2733e1ae8ca7b056fff1a505b1c Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 16 Oct 2019 16:02:15 +0200 Subject: [PATCH 05/19] ENH: add a debug plasmid --- work/plasmids/debug.yaml | 26 +++++++++++++++++ work/plasmids/vector.py | 62 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 work/plasmids/debug.yaml diff --git a/work/plasmids/debug.yaml b/work/plasmids/debug.yaml new file mode 100644 index 0000000..90cbfca --- /dev/null +++ b/work/plasmids/debug.yaml @@ -0,0 +1,26 @@ +model: 'debug' +tag: 'debug' +simulation: + copy_number: 1 + vector: 'debug' + add_rnap: yes +assumptions: + growth_rate: 'auto' +analysis: + # Export values ? + export_peptides: yes + export_mrna: yes + export_fluxes: yes + binding_constraints: + epsilon: 1.0e-8 + # Perform Chebyshev centering ? + # chebyshev: + # chebyshev_include: + # - ForwardCatalyticConstraint + # - BackwardCatalyticConstraint + # - RNAPAllocation + # - ExpressionCoupling + # inplace: false +options: + verbose: false + inplace: true diff --git a/work/plasmids/vector.py b/work/plasmids/vector.py index 70fa388..979eb89 100644 --- a/work/plasmids/vector.py +++ b/work/plasmids/vector.py @@ -9,6 +9,66 @@ from cobra.core import Metabolite, Reaction +def get_debug_plasmid(model, has_rnap=False): + + h2o_c = model.metabolites.h2o_c + h2o_e = model.metabolites.h2o_e + + # CO2 Exchange + h2o_tpp = Reaction(id='h2otpp', name='H2O Exchange') + h2o_tpp.add_metabolites({ + h2o_c: 1, + h2o_e: -1, + }) + + reaction_list = [h2o_tpp] + + ############# + # Genes # + ############# + + DBG = ExpressedGene(id='DBG_gene', + name='Debug synthase', + sequence='AATTTCGGTTGA'.lower()) + + ############# + # Enzymes # + ############# + + DBG_enz = Enzyme( + id='DBG', + kcat=65 * 3600, + kdeg=kdeg_enz, + composition={'DBG_gene': 3} + ) + + ############ + # Coupling # + ############ + + coupling_dict = {'h2otpp': [DBG_enz]} + + ########### + # Plasmid # + ########### + + if 1: + rnap_genes = [] + rnap = None + + gene_list = [DBG] + rnap_genes + + plasmid_seq = DBG.sequence + + my_plasmid = Plasmid(id_='debug', + sequence=plasmid_seq, + genes=gene_list, + reactions=reaction_list, + coupling_dict=coupling_dict, + rnap=rnap) + my_plasmid.build_default_mrna(kdeg_mrna) + + return my_plasmid def get_bdo_plasmid(model, has_rnap=False): @@ -212,4 +272,4 @@ def get_bdo_plasmid(model, has_rnap=False): rnap = rnap) my_plasmid.build_default_mrna(kdeg_mrna) - return my_plasmid \ No newline at end of file + return my_plasmid From 1d75ea611e1b3b50e70aa489a531f38b7c143cc4 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Wed, 16 Oct 2019 16:04:31 +0200 Subject: [PATCH 06/19] ENH: Wrapper for performing plasmid integration via YAML-spec --- work/plasmids/analysis.py | 73 ++++++++++++++++++++++++++++++++++++--- work/plasmids/main.py | 22 +++++++++--- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py index a16fb53..8861c5b 100644 --- a/work/plasmids/analysis.py +++ b/work/plasmids/analysis.py @@ -1,5 +1,13 @@ from etfl.optim.utils import check_solution from etfl.optim.variables import mRNAVariable, EnzymeVariable +from etfl.analysis.utils import enzymes_to_peptides_conc +from etfl.analysis.summary import print_standard_sol + +from pytfa.io.viz import export_reactions_for_escher + +import pandas as pd + +from collections import OrderedDict def chebyshev(model, chebyshev_include, inplace=True, big_m=1000): @@ -10,6 +18,10 @@ def chebyshev(model, chebyshev_include, inplace=True, big_m=1000): chebyshev_variables = model.get_variables_of_type(mRNAVariable) chebyshev_variables += model.get_variables_of_type(EnzymeVariable) + initial_solution = model.optimize() + + the_obj = model.objective + chebyshev_center(model, chebyshev_variables, inplace=inplace, big_m=big_m, @@ -21,19 +33,70 @@ def chebyshev(model, chebyshev_include, inplace=True, big_m=1000): # We want to compute the center under the constraint of the growth at the # initial solution. - chebyshev_sol = compute_center(model, provided_solution=initial_solution) + chebyshev_sol = compute_center(model, provided_solution=initial_solution, + revert_changes=False) print_standard_sol(model) + return chebyshev_sol def export_mrna(model, tag, solution=None): - solution = check_solution(solution) + solution = check_solution(model, solution) - mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('id') + mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('name') data = solution.raw mrna_data = data[mrna_var_ids] mrna_data.name = tag - filename = 'outputs/' + tag + 'mrna' + '.csv' + filename = 'outputs/' + tag + '_mrna' + '.csv' + + mrna_data.to_csv(filename, header=True) + return mrna_data + +def export_peptides(model, tag, solution=None): + solution = check_solution(model, solution) + + enz_var_ids = model.get_variables_of_type(EnzymeVariable).list_attr('name') + + data = solution.raw + enz_data = data[enz_var_ids] + enz_data.name = tag + + pep_data = enzymes_to_peptides_conc(model=model, enzyme_conc=enz_data) + pep_data = pd.DataFrame.from_dict(pep_data, orient='index') + + filename = 'outputs/' + tag + '_pep' + '.csv' + + pep_data.to_csv(filename, header=True) + return pep_data + + +def export_fluxes(model, tag, solution=None): + solution = check_solution(model, solution) + + filename = 'outputs/' + tag + '_rxn' + '.csv' + + export_reactions_for_escher(model, solution.raw, filename) + return solution.fluxes + + +def binding_constraints(model, tag, epsilon, solution=None): + from etfl.optim.utils import get_binding_constraints + # solution = check_solution(model, solution) + model.optimize() + + filename = 'outputs/' + tag + '_binding_cons' + '.csv' + + binding_constraints = get_binding_constraints(model, epsilon) + + name2bind = OrderedDict() + + for cons_kind, cons_names in binding_constraints.items(): + for the_name in cons_names: + name2bind[the_name] = cons_kind + + df = pd.DataFrame.from_dict(name2bind, orient='index') + df.columns = ['name','type'] - mrna_data.to_csv(filename, header=True) \ No newline at end of file + df.to_csv(filename, header=True) + return df diff --git a/work/plasmids/main.py b/work/plasmids/main.py index aa31561..c0196e6 100644 --- a/work/plasmids/main.py +++ b/work/plasmids/main.py @@ -17,7 +17,8 @@ import sys from utils import read_config -from vector import get_bdo_plasmid +from vector import get_bdo_plasmid, get_debug_plasmid +import analysis if len(sys.argv)<=1: raise ValueError('Please provide a YAML configuration file') @@ -26,7 +27,8 @@ print('Using configuration file: {}'.format(CONFIG)) -vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid} +vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid, + 'debug':get_debug_plasmid} if __name__ == '__main__': config = read_config(CONFIG) @@ -57,10 +59,20 @@ transmodel.optimize() print_standard_sol(transmodel) - for analysis,arguments in config['analysis']: + outputs = dict() + + if 'chebyshev' in config['analysis']: + arguments = config['analysis'].pop('chebyshev') + arguments['model'] = transmodel + # arguments['tag'] = config['tag'] + outputs['chebyshev'] = analysis.chebyshev(**arguments) + + for analysis_str,arguments in config['analysis'].items(): if arguments != False: + if arguments == True: + arguments = dict() arguments['model'] = transmodel arguments['tag'] = config['tag'] - analysis(**arguments) + outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments) - # Check antibiotic resistance is expressed ? \ No newline at end of file + # Check antibiotic resistance is expressed ? From d8a453241a60d9106f3c8efb7f8294f9643ac220 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 25 Oct 2019 09:28:18 +0200 Subject: [PATCH 07/19] ENH: added objective argument to compute center in case of non-single radius --- etfl/analysis/dynamic.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/etfl/analysis/dynamic.py b/etfl/analysis/dynamic.py index 0c5dfc7..4f90db0 100644 --- a/etfl/analysis/dynamic.py +++ b/etfl/analysis/dynamic.py @@ -361,12 +361,14 @@ def update_medium(t, Xi, Si, dmodel, medium_fun, timestep): return X,S -def compute_center(dmodel, provided_solution=None, revert_changes=True): +def compute_center(dmodel, objective, + provided_solution=None, revert_changes=True): """ Fixes growth to be above computed lower bound, finds chebyshev center, resets the model, returns solution data :param dmodel: + :param objective: the radius to maximize :return: """ @@ -386,12 +388,13 @@ def compute_center(dmodel, provided_solution=None, revert_changes=True): fix_growth(dmodel, the_solution) + dmodel.objective = objective #dmodel.chebyshev_radius.radius.variable try: - dmodel.objective = dmodel.chebyshev_radius.radius.variable dmodel.optimize() chebyshev_sol = dmodel.solution - release_growth(dmodel) + if revert_changes: + release_growth(dmodel) except AttributeError: #does not solve dmodel.logger.warning('Chebyshev solution at fixed growth solution ' 'not found - relaxing integers and solving ' @@ -475,7 +478,9 @@ def run_dynamic_etfl(model, timestep, tfinal, uptake_fun, medium_fun, # We want to compute the center under the constraint of the growth at the # initial solution. - chebyshev_sol = compute_center(dmodel,provided_solution=initial_solution) + chebyshev_sol = compute_center(dmodel, + objective = dmodel.chebyshev_radius.radius.variable, + provided_solution=initial_solution) # Only now do we add the dynamic variable constraints. add_dynamic_variables_constraints(dmodel, timestep, dynamic_constraints) @@ -562,4 +567,4 @@ def run_dynamic_etfl(model, timestep, tfinal, uptake_fun, medium_fun, return wrap_time_sol(var_solutions, obs_values) def wrap_time_sol(var_solutions, obs_values): - return pd.concat([var_solutions,obs_values], axis=0) \ No newline at end of file + return pd.concat([var_solutions,obs_values], axis=0) From af3012ab0512ece9aaf76d48e7733770fe254794 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 25 Oct 2019 09:28:42 +0200 Subject: [PATCH 08/19] MNT: Cleanup --- etfl/optim/utils.py | 4 +++- etfl/tests/small_model.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/etfl/optim/utils.py b/etfl/optim/utils.py index acf43c3..926e228 100644 --- a/etfl/optim/utils.py +++ b/etfl/optim/utils.py @@ -455,8 +455,10 @@ def safe_optim(model): def get_binding_constraints(model, epsilon): - if model.problem.__name__ == 'optlang.gurobi_interface': + if is_gurobi(model): return {kind:[c.name for c in these_cons if c.constraint._internal_constraint.Slack <= epsilon] for kind,these_cons in model._cons_kinds.items()} + else: + raise(NotImplementedError) diff --git a/etfl/tests/small_model.py b/etfl/tests/small_model.py index 7e4d47a..0a4d51e 100644 --- a/etfl/tests/small_model.py +++ b/etfl/tests/small_model.py @@ -38,11 +38,11 @@ GUROBI = 'optlang-gurobi' GLPK = 'optlang-glpk' -solver = GLPK +DEFAULT_SOLVER = GLPK aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict() essentials = get_essentials() -def create_fba_model(solver = GLPK): +def create_fba_model(solver = DEFAULT_SOLVER): # g6p_c = cobra.Metabolite(id = 'g6p_c', formula = 'C6H13O9P') # f6p_c = cobra.Metabolite(id = 'f6p_c', formula = 'C6H13O9P') @@ -125,6 +125,7 @@ def create_etfl_model(has_thermo, has_neidhardt, n_mu_bins = 64, mu_max = 3, optimize = True, + solver=DEFAULT_SOLVER, ): #------------------------------------------------------------ # Initialisation From fb67c9bc2c55a6b32d12f8a3cc803f43c18ef87e Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 25 Oct 2019 09:30:26 +0200 Subject: [PATCH 09/19] ENH: chebyshev 3 separate centers + slacks export + folder export --- work/plasmids/analysis.py | 139 +++++++++++++++---- work/plasmids/debug.yaml | 21 ++- work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml | 19 ++- work/plasmids/main.py | 13 +- 4 files changed, 152 insertions(+), 40 deletions(-) diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py index 8861c5b..c3e3630 100644 --- a/work/plasmids/analysis.py +++ b/work/plasmids/analysis.py @@ -1,5 +1,5 @@ from etfl.optim.utils import check_solution -from etfl.optim.variables import mRNAVariable, EnzymeVariable +from etfl.optim.variables import mRNAVariable, EnzymeVariable, DNAVariable, RibosomeUsage, RNAPUsage from etfl.analysis.utils import enzymes_to_peptides_conc from etfl.analysis.summary import print_standard_sol @@ -9,33 +9,75 @@ from collections import OrderedDict +def mkdir(directory): + import os + full_dir = 'outputs/' + directory + if not os.path.exists(full_dir): + os.makedirs(full_dir) def chebyshev(model, chebyshev_include, inplace=True, big_m=1000): - from etfl.optim.variables import mRNAVariable,EnzymeVariable + from etfl.optim.variables import mRNAVariable,EnzymeVariable, RibosomeUsage, RNAPUsage from etfl.optim.constraints import ForwardCatalyticConstraint,BackwardCatalyticConstraint - from etfl.analysis.dynamic import chebyshev_center, compute_center - - chebyshev_variables = model.get_variables_of_type(mRNAVariable) - chebyshev_variables += model.get_variables_of_type(EnzymeVariable) - - initial_solution = model.optimize() - - the_obj = model.objective - - chebyshev_center(model, chebyshev_variables, - inplace=inplace, - big_m=big_m, - include=chebyshev_include) - - # Revert to the previous objective, as chebyshev_center sets it to maximize - # the radius - model.objective = the_obj + from pytfa.analysis.chebyshev import chebyshev_transform, \ + get_cons_var_classes, get_variables + from etfl.analysis.dynamic import compute_center + + if not inplace: + new = model.copy() + else: + new = model + + new.objective = new.growth_reaction + initial_solution = new.optimize() + print('Initial solution:') + print_standard_sol(new,solution=initial_solution) + + # List of radii to add + radii = list() + + the_obj = new.objective + + for this_cons_type in chebyshev_include: + if this_cons_type == 'CatalyticConstraint': + # chebyshev_variables = new.get_variables_of_type(EnzymeVariable) + # vars = get_variables(new, chebyshev_variables) + include_list = get_cons_var_classes(new, ['ForwardCatalyticConstraint', 'BackwardCatalyticConstraint'], + type = 'cons') + this_r_id = 'catalytic' + elif this_cons_type =='ExpressionCoupling': + # chebyshev_variables = new.get_variables_of_type(mRNAVariable) + # chebyshev_variables = new.get_variables_of_type(RibosomeUsage) + # vars = get_variables(new, chebyshev_variables) + include_list = get_cons_var_classes(new, [this_cons_type], type='cons') + this_r_id = 'translation' + elif this_cons_type =='RNAPAllocation': + # chebyshev_variables = new.get_variables_of_type(DNAVariable) + # chebyshev_variables = new.get_variables_of_type(RNAPUsage) + # vars = get_variables(new, chebyshev_variables) + include_list = get_cons_var_classes(new, [this_cons_type], type='cons') + this_r_id = 'transcription' + else: + include_list = get_cons_var_classes(new, [this_cons_type], type='cons') + this_r_id = this_cons_type + + + r = chebyshev_transform(model=new, + vars=list(), + # vars=vars, + include_list=include_list, + exclude_list=list(), + radius_id=this_r_id) + radii.append(r) + + regularisation = 0.001*new.enzymes.dummy_enzyme.scaled_concentration + cheby_obj = sum(radii) + regularisation # We want to compute the center under the constraint of the growth at the # initial solution. - chebyshev_sol = compute_center(model, provided_solution=initial_solution, + chebyshev_sol = compute_center(new, provided_solution=initial_solution, + objective=cheby_obj, revert_changes=False) - print_standard_sol(model) + print_standard_sol(new, solution=chebyshev_sol) return chebyshev_sol @@ -48,7 +90,8 @@ def export_mrna(model, tag, solution=None): mrna_data = data[mrna_var_ids] mrna_data.name = tag - filename = 'outputs/' + tag + '_mrna' + '.csv' + mkdir(tag) + filename = 'outputs/' + tag + '/mrna' + '.csv' mrna_data.to_csv(filename, header=True) return mrna_data @@ -65,7 +108,8 @@ def export_peptides(model, tag, solution=None): pep_data = enzymes_to_peptides_conc(model=model, enzyme_conc=enz_data) pep_data = pd.DataFrame.from_dict(pep_data, orient='index') - filename = 'outputs/' + tag + '_pep' + '.csv' + mkdir(tag) + filename = 'outputs/' + tag + '/pep' + '.csv' pep_data.to_csv(filename, header=True) return pep_data @@ -74,7 +118,8 @@ def export_peptides(model, tag, solution=None): def export_fluxes(model, tag, solution=None): solution = check_solution(model, solution) - filename = 'outputs/' + tag + '_rxn' + '.csv' + mkdir(tag) + filename = 'outputs/' + tag + '/rxn' + '.csv' export_reactions_for_escher(model, solution.raw, filename) return solution.fluxes @@ -85,7 +130,8 @@ def binding_constraints(model, tag, epsilon, solution=None): # solution = check_solution(model, solution) model.optimize() - filename = 'outputs/' + tag + '_binding_cons' + '.csv' + mkdir(tag) + filename = 'outputs/' + tag + '/binding_cons' + '.csv' binding_constraints = get_binding_constraints(model, epsilon) @@ -96,7 +142,48 @@ def binding_constraints(model, tag, epsilon, solution=None): name2bind[the_name] = cons_kind df = pd.DataFrame.from_dict(name2bind, orient='index') - df.columns = ['name','type'] + df.columns = ['type'] + df.index.name = 'name' + + df.to_csv(filename, header=True) + return df + + +def export_slacks(model, tag, constraint_types, solution=None): + from pytfa.analysis.chebyshev import get_cons_var_classes, ChebyshevRadius + + solution = check_solution(model, solution) + include_list = get_cons_var_classes(model, constraint_types, + type = 'cons') + cons_dict = {} + + if model.problem.__name__ == 'optlang.gurobi_interface': + for cons_class in include_list: + these_cons = model.get_constraints_of_type(cons_class) + cons_dict.update({c.name:(c.constraint._internal_constraint.Slack, + get_radius(c.expr), + c.__class__.__name__) + for c in these_cons}) + + mkdir(tag) + filename = 'outputs/' + tag + '/slacks' + '.csv' + + df = pd.DataFrame.from_dict(cons_dict, orient='index') + df.columns = ['slack','radius','type'] + df.index.name = 'name' df.to_csv(filename, header=True) return df + +def get_radius(expr): + """ + Returns the value of the sum of Chebyshev radii in a constraint expression + + :param cons: + :return: + """ + from pytfa.analysis.chebyshev import ChebyshevRadius + variables = expr.free_symbols + total = sum([v.primal for v in variables + if v.name.startswith(ChebyshevRadius.prefix)]) + return total \ No newline at end of file diff --git a/work/plasmids/debug.yaml b/work/plasmids/debug.yaml index 90cbfca..ce6f2aa 100644 --- a/work/plasmids/debug.yaml +++ b/work/plasmids/debug.yaml @@ -13,14 +13,21 @@ analysis: export_fluxes: yes binding_constraints: epsilon: 1.0e-8 + export_slacks: + constraint_types: + - ForwardCatalyticConstraint + - BackwardCatalyticConstraint + - SynthesisConstraint + - RNAPAllocation + - ExpressionCoupling # Perform Chebyshev centering ? - # chebyshev: - # chebyshev_include: - # - ForwardCatalyticConstraint - # - BackwardCatalyticConstraint - # - RNAPAllocation - # - ExpressionCoupling - # inplace: false + chebyshev: + chebyshev_include: + - CatalyticConstraint + - RNAPAllocation + - ExpressionCoupling + inplace: true options: verbose: false inplace: true + solver: gurobi diff --git a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml index aab6683..8890f1a 100644 --- a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml +++ b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml @@ -1,5 +1,5 @@ -model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191007_142013.json' -tag: 'vEFL_BDO_1c_RNAP' +model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191014_124839.json' +tag: 'vEFL_BDO_1c_RNAP_cheby_sum_reg' simulation: copy_number: 1 vector: 'plasmid_pET-AR-ALS' @@ -11,14 +11,23 @@ analysis: export_peptides: yes export_mrna: yes export_fluxes: yes + binding_constraints: + epsilon: 1.0e-8 + export_slacks: + constraint_types: + - ForwardCatalyticConstraint + - BackwardCatalyticConstraint + - SynthesisConstraint + - RNAPAllocation + - ExpressionCoupling # Perform Chebyshev centering ? chebyshev: chebyshev_include: - - ForwardCatalyticConstraint - - BackwardCatalyticConstraint + - CatalyticConstraint - RNAPAllocation - ExpressionCoupling - inplace: false + inplace: true options: verbose: false inplace: true + solver: gurobi diff --git a/work/plasmids/main.py b/work/plasmids/main.py index c0196e6..856f860 100644 --- a/work/plasmids/main.py +++ b/work/plasmids/main.py @@ -30,14 +30,22 @@ vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid, 'debug':get_debug_plasmid} +def write_conf(model, conf): + filename = 'outputs/' + conf['tag'] + '/config.yaml' + conf['objective'] = model.objective.expression + with open(filename,'w') as fid: + yaml.dump(config, fid) + + + if __name__ == '__main__': config = read_config(CONFIG) if config['model'] != 'debug': - model = load_json_model(config['model']) + model = load_json_model(config['model'],solver=config['options']['solver']) else: from etfl.tests.small_model import create_etfl_model - model = create_etfl_model(0,1) + model = create_etfl_model(0,1, solver=config['options']['solver']) standard_solver_config(model) model.solver.configuration.verbosity = config['options']['verbose'] @@ -76,3 +84,4 @@ outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments) # Check antibiotic resistance is expressed ? + write_conf(transmodel, config) From 860e01027dc9ee4700425d4dd7dd02bcf91f8ae7 Mon Sep 17 00:00:00 2001 From: Omid Date: Wed, 30 Oct 2019 10:16:36 +0100 Subject: [PATCH 10/19] FIX: fix sequence type on reloading a model. --- etfl/io/dict.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/etfl/io/dict.py b/etfl/io/dict.py index 176bf17..09ef703 100644 --- a/etfl/io/dict.py +++ b/etfl/io/dict.py @@ -12,6 +12,9 @@ from tqdm import tqdm +from Bio.Seq import Seq +from Bio.Alphabet import DNAAlphabet, RNAAlphabet, ProteinAlphabet + import cobra.io.dict as cbd from cobra.exceptions import SolverNotFound from optlang.util import expr_to_json, parse_expr @@ -892,8 +895,8 @@ def find_genes_from_dict(new, obj): g = replace_by_me_gene(new, gene_dict['id'], str(sequence)) if key == 'expressed_genes': # Newer models - g._rna = gene_dict['rna'] - g._peptide = gene_dict['peptide'] + g._rna = Seq(gene_dict['rna'], alphabet=RNAAlphabet()) + g._peptide = Seq(gene_dict['peptide'], alphabet=ProteinAlphabet()) g._copy_number = gene_dict['copy_number'] g._transcribed_by = [new.enzymes.get_by_id(e) for e in gene_dict['transcribed_by']] \ From a8fb5bf8dc36b2337d585cac72f566f2a310faf9 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:05:57 +0100 Subject: [PATCH 11/19] ENH: changed the hack to create a transmodel from an ETFL model to make it more robust --- etfl/core/vector.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/etfl/core/vector.py b/etfl/core/vector.py index 0fc13eb..8a9be52 100644 --- a/etfl/core/vector.py +++ b/etfl/core/vector.py @@ -86,26 +86,35 @@ def make_plasmid(gene_list): class TransModel(MEModel): def __init__(self, me_model, inplace = True): + """ + Hack to subclass instantiated object: + https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass + + :param me_model: + :param inplace: + """ if not inplace: new = me_model.copy() + self.__dict__ = new.__dict__ else: - new = me_model + # new = me_model + self.__dict__ = me_model.__dict__ - self._me_model = new + # self._me_model = new self._has_transcription_changed = False self._has_translation_changed = False self.vectors = dict() self.vector_copy_number = dict() - def __getattr__(self,attr): - """ - Hack to subclass instantiated object: - https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass - - :param attr: - :return: - """ - return getattr(self._me_model,attr) + # def __getattr__(self,attr): + # """ + # Hack to subclass instantiated object: + # https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass + # + # :param attr: + # :return: + # """ + # return getattr(self._me_model,attr) def add_vector(self, vector, copy_number=1): From 505cc0f6c00923aeaf1e6647c4b8ba027b23d77d Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:07:03 +0100 Subject: [PATCH 12/19] ENH: global improvmements in all parts of vector.py --- etfl/core/vector.py | 62 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/etfl/core/vector.py b/etfl/core/vector.py index 8a9be52..b000a37 100644 --- a/etfl/core/vector.py +++ b/etfl/core/vector.py @@ -15,7 +15,7 @@ DNA_WEIGHT_CONS_ID, MRNA_WEIGHT_VAR_ID, PROT_WEIGHT_VAR_ID, \ DNA_WEIGHT_VAR_ID, DNA_FORMATION_RXN_ID,\ define_prot_weight_constraint, define_mrna_weight_constraint, \ - define_dna_weight_constraint + define_dna_weight_constraint, get_dna_synthesis_mets from ..optim.constraints import tRNAMassBalance, InterpolationConstraint, \ TotalCapacity, EnzymeRatio from ..optim.variables import InterpolationVariable @@ -130,17 +130,20 @@ def add_vector(self, vector, if vector.ribosome is not None: self.add_vector_ribosome(vector.ribosome) - # Edit the gene copy number by the number of plasmids. - for g in vector.genes: - g.copy_number *= copy_number - self.add_genes(vector.genes) + # This adds the peptides as well: self.add_nucleotide_sequences({g.id:g.sequence for g in vector.genes}) + # Edit the gene copy number by the number of plasmids. + # TODO: make this more robust / Edit add_nt_seq to include gene copy # + for g in vector.genes: + self.genes.get_by_id(g.id).copy_number = g.copy_number * copy_number + self.express_genes(vector.genes) # Add the mRNAs self.add_mrnas(vector.mrna_dict.values()) + self._push_queue() # @@ -167,13 +170,12 @@ def add_vector(self, vector, # recompute mRNA-dependent constraints self.recompute_trna_balances() - if self._has_transcription_changed: - self.recompute_transcription() - if self._has_translation_changed: - self.recompute_translation() + self.recompute_transcription() + self.recompute_translation() # This needs to account for new DNA self.recompute_allocation() + self._push_queue() def recompute_trna_balances(self): @@ -212,10 +214,6 @@ def add_vector_RNAP(self, rnap): # This adds the RNAP as an enzyme, and also enforces its free ratio self.add_rnap(rnap, free_ratio=free_rnap_ratio) - self._has_transcription_changed = True - - pass - def add_vector_ribosome(self, ribosome): """ Adds the vector's ribosome to the ribosome pool of the cell @@ -225,25 +223,19 @@ def add_vector_ribosome(self, ribosome): """ self.add_ribosome(ribosome, free_ratio=free_rib_ratio) - self._has_translation_changed = True - - pass - def recompute_translation(self): """ :return: """ - #1.1 Remove translation catalytic constraints + for rib_id in self.ribosome: + #1.1 Find the RNAP capacity constraint + cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rib_id) - #1.2 Apply new translation catalytic constraints - - #2.1 Remove former ribosome balance - - #2.2 Apply new ribosome balance - - pass + #1.2 Edit the RNAP capacity constraint + new_total_capacity = self._get_rib_total_capacity() + cons.change_expr(new_total_capacity) def recompute_transcription(self): """ @@ -251,12 +243,15 @@ def recompute_transcription(self): :return: """ - #1.1 Find the RNAP capacity constraint - cons = self.get_constraints_of_type(TotalCapacity).get_by_id('rnap') + # TODO: No need to recompute for the one we just added. + # Keep tabs ? use an except list + for rnap_id in self.rnap: + # 1.1 Find the RNAP capacity constraint + cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rnap_id) - #1.2 Edit the RNAP capacity constraint - new_total_capacity = self._get_rnap_total_capacity() - cons.change_expr(new_total_capacity) + # 1.2 Edit the RNAP capacity constraint + new_total_capacity = self._get_rnap_total_capacity() + cons.change_expr(new_total_capacity) def recompute_allocation(self): """ @@ -336,10 +331,9 @@ def recalculate_dna(self, dna_ggdw): new_gc = (wt_chromosome_len * wt_gc + vector_dna_len * vector_gc) / new_chromosome_len #3. Update ATGC stoichiometries - dna_formation_reaction.add_metabolites({ - v: -1 * new_chromosome_len * (new_gc if k.lower() in 'gc' else 1 - new_gc) - for k, v in self.dna_nucleotides.items() - }, combine = False) + ppi = self.essentials['ppi'] + mets = get_dna_synthesis_mets(self, new_chromosome_len, new_gc, ppi) + dna_formation_reaction.add_metabolites(mets, combine = False) ##. Redefine the DNA weight constraint dna.len = new_chromosome_len From 4420e873a4d5adf11f78a1c03db685539203742c Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:09:22 +0100 Subject: [PATCH 13/19] REF: cleand up getting stoichiometries and constraints fro DNA and Ribosome --- etfl/core/allocation.py | 17 +++++++++++------ etfl/core/memodel.py | 35 ++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/etfl/core/allocation.py b/etfl/core/allocation.py index ed82e79..e7e0bbd 100644 --- a/etfl/core/allocation.py +++ b/etfl/core/allocation.py @@ -399,12 +399,7 @@ def add_dna_mass_requirement(model, mu_values, dna_rel, gc_ratio, scaled=True) model.add_reactions([dna_formation]) - # In this formulation, we make 1 unit of the whole chromosome with NTPs - g = gc_ratio - mets = {v: -1 * chromosome_len * (g if k.lower() in 'gc' else 1 - g) - for k, v in model.dna_nucleotides.items()} - # Don't forget to release ppi (2 ppi per bp) - mets[ppi] = 2 * chromosome_len + mets = get_dna_synthesis_mets(model, chromosome_len, gc_ratio, ppi) dna_formation.add_metabolites(mets) @@ -443,6 +438,16 @@ def add_dna_mass_requirement(model, mu_values, dna_rel, gc_ratio, model.regenerate_constraints() +def get_dna_synthesis_mets(model, chromosome_len, gc_ratio, ppi): + # In this formulation, we make 1 unit of the whole chromosome with NTPs + g = gc_ratio + mets = {v: -1 * chromosome_len * (g if k.lower() in 'gc' else 1 - g) + for k, v in model.dna_nucleotides.items()} + # Don't forget to release ppi (2 ppi per bp) + mets[ppi] = 2 * chromosome_len + return mets + + def define_dna_weight_constraint(model, dna, dna_ggdw, gc_content, chromosome_len): # DNA mass (BioPython has g.mol^-1, while we are in mmol) ma = molecular_weight('A', seq_type='DNA') / 1000 # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1 diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py index e752557..2d69086 100644 --- a/etfl/core/memodel.py +++ b/etfl/core/memodel.py @@ -1616,23 +1616,7 @@ def _populate_ribosomes(self): # 3 -> Add ribosomal capacity constraint self.regenerate_variables() - free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(x) - for x in self.ribosome] - # CATCH : This is summing ~1500+ variable objects, and for a reason - # sympy does not like it. Let's cut it in smaller chunks and sum - # afterwards - # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) - all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) - - # sum_RPs = chunk_sum(all_ribosome_usage) - sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage]) - - # The total RNAP capacity constraint looks like - # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) - ribo_usage = sum_RPs \ - + sum([x.unscaled for x in free_ribosome]) \ - - sum([x.concentration for x in self.ribosome.values()]) - ribo_usage /= min([x.scaling_factor for x in self.ribosome.values()]) + ribo_usage = self._get_rib_total_capacity() # For nondimensionalization # ribo_usage = (sum_RPs + self.Rf.unscaled - self.ribosome.concentration) \ # / self.ribosome.scaling_factor @@ -1650,6 +1634,23 @@ def _populate_ribosomes(self): self.regenerate_constraints() self.regenerate_variables() + def _get_rib_total_capacity(self): + free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(x) + for x in self.ribosome] + # CATCH : This is summing ~1500+ variable objects, and for a reason + # sympy does not like it. Let's cut it in smaller chunks and sum + # afterwards + # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) + all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) + # sum_RPs = chunk_sum(all_ribosome_usage) + sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage]) + # The total RNAP capacity constraint looks like + # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) + ribo_usage = sum_RPs \ + + sum([x.unscaled for x in free_ribosome]) \ + - sum([x.concentration for x in self.ribosome.values()]) + ribo_usage /= min([x.scaling_factor for x in self.ribosome.values()]) + return ribo_usage def apply_ribosomal_catalytic_constraint(self, reaction): """ From c287b898a8f258cc22779b710eba8e3b21e4585d Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:09:49 +0100 Subject: [PATCH 14/19] FIX: more robust handling of genes --- etfl/core/genes.py | 6 +++--- etfl/core/macromolecule.py | 8 ++++++-- etfl/core/memodel.py | 6 +++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/etfl/core/genes.py b/etfl/core/genes.py index 29e5584..c0c1267 100644 --- a/etfl/core/genes.py +++ b/etfl/core/genes.py @@ -47,7 +47,7 @@ def __init__(self, id, name, sequence, copy_number=1, self._rna = '' self._peptide = '' - self._copy_number = copy_number + self._copy_number = int(copy_number) self._transcribed_by = transcribed_by self._translated_by = translated_by @@ -61,10 +61,10 @@ def copy_number(self, value): if value != self._copy_number: if self.model is None: # Easy - self._copy_number = value + self._copy_number = int(value) else: # We need to make the model change the RNAP allocation - self._copy_number = value + self._copy_number = int(value) self.model.edit_gene_copy_number(self.id) else: # Nothing to do here :) diff --git a/etfl/core/macromolecule.py b/etfl/core/macromolecule.py index 894c254..ade46fc 100644 --- a/etfl/core/macromolecule.py +++ b/etfl/core/macromolecule.py @@ -16,11 +16,12 @@ class Macromolecule(Species, ABC): - def __init__(self, id=None, kdeg=0, *args, **kwargs): + def __init__(self, id=None, kdeg=0, scaling_factor = None, *args, **kwargs): Species.__init__(self, id = id, *args, **kwargs) self.kdeg = kdeg self.degradation = None + self._scaling_factor = scaling_factor @abstractmethod def init_variable(self, queue=False): @@ -95,4 +96,7 @@ def molecular_weight(self): @property def scaling_factor(self): - return 1/self.molecular_weight \ No newline at end of file + # Calculate it only once + if self._scaling_factor is None: + self._scaling_factor = 1/self.molecular_weight + return self._scaling_factor \ No newline at end of file diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py index 2d69086..a5fce7d 100644 --- a/etfl/core/memodel.py +++ b/etfl/core/memodel.py @@ -434,7 +434,7 @@ def express_genes(self, gene_list): if isinstance(gene, str): the_gene = self.genes.get_by_id(gene) else: - the_gene = gene + the_gene = self.genes.get_by_id(gene.id) if not isinstance(the_gene, ExpressedGene): continue @@ -1309,10 +1309,10 @@ def edit_gene_copy_number(self, gene_id): rnap_alloc = self.get_constraints_of_type(RNAPAllocation) rnap_usage = self.get_variables_of_type(RNAPUsage) - if self.id in rnap_alloc and self.id in rnap_usage: + if gene_id in rnap_alloc and gene_id in rnap_usage: # We need to edit the variable # Modify the polymerase constraint - cons = rnap_alloc.get_by_id(self.id) + cons = rnap_alloc.get_by_id(gene_id) new_expr = self._get_rnap_allocation_expression(the_gene) cons.change_expr(new_expr) self.logger.debug('Changed RNAP allocation for gene {}'.format(gene_id)) From ad63220d5efe58e916547435e2c6fed033497d71 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:11:48 +0100 Subject: [PATCH 15/19] ENH: Adding RNAP binding equilibrium constraints --- etfl/core/equilibrium.py | 121 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 etfl/core/equilibrium.py diff --git a/etfl/core/equilibrium.py b/etfl/core/equilibrium.py new file mode 100644 index 0000000..339b275 --- /dev/null +++ b/etfl/core/equilibrium.py @@ -0,0 +1,121 @@ +from .genes import ExpressedGene +from ..optim.variables import RNAPUsage, FreeEnzyme, BinaryActivator, \ + LinearizationVariable +from ..optim.constraints import GeneConstraint, EnzymeRatio,\ + LinearizationConstraint +from pytfa.optim.reformulation import petersen_linearization + + +class RNAPBindingEquilibrium(GeneConstraint): + prefix = 'PBEq_' + + +def linearize_product(model, b, x, queue=False): + """ + + :param model: + :param b: the binary variable + :param x: the continuous variable + :param queue: whether to queue the variables and constraints made + :return: + """ + + # Linearization step for ga_i * [E] + z_name = '__MUL__'.join([b.name, x.name]) + # Add the variables + model_z_u = model.add_variable(kind=LinearizationVariable, + hook=model, + id_=z_name, + lb=0, + ub=x.ub, + queue=False) + + big_m = x.ub*10 + + z_u, new_constraints = petersen_linearization(b=b, x=x, M=big_m, + z=model_z_u) + + # Add the constraints: + for cons in new_constraints: + model.add_constraint(kind=LinearizationConstraint, + hook=model, + id_=cons.name, + expr=cons.expression, + # expr=new_expression, + ub=cons.ub, + lb=cons.lb, + queue=True) + + model._push_queue() + return model_z_u + +def add_rnap_binding_equilibrium_constraints(model, the_rnap, Kb): + """ + + :param model: + :param the_rnap: + :param Kb: + :return: + """ + + # #1. Find if there is a ratio constraint on the RNAP - this is incompatible + # all_ratio_cons = model.get_constraints_of_type(EnzymeRatio) + # + # for rnap_id in model.rnap: + # try: + # model.remove_constraint(all_ratio_cons.get_by_id(rnap_id)) + # except KeyError: + # pass + + #2. Create the linearized bilinear product delta_u*RNAP_F + + RNAP_F = model.get_variables_of_type(FreeEnzyme).get_by_id(the_rnap.id).variable + activation_vars = model.get_variables_of_type(BinaryActivator) + + zs = {delta_u.name: linearize_product(model, delta_u, RNAP_F, queue=True) + for delta_u in activation_vars} + + # TODO: cleanup + dna_cons = model.interpolation_constraint.DNA_interpolation.constraint + dna_hat = {k.name:v for k,v in + dna_cons.get_linear_coefficients( + activation_vars.list_attr('variable')).items()} + + free_rnap_times_gene_conc = sum(zs[delta_u]*dna_hat[delta_u] for delta_u in zs) + + sorted_dna_conc = sorted(dna_hat.values()) + dna_hat_resolution = max([x-y for x,y in zip(sorted_dna_conc[1:], sorted_dna_conc[:-1])]) + + + all_rnap_usage = model.get_variables_of_type(RNAPUsage) + + #3. For each gene: + + for the_gene in model.genes: + if not isinstance(the_gene, ExpressedGene): + continue + + copy_number = the_gene.copy_number + + #3.1. get rnap usage + + RNAP_l = all_rnap_usage.get_by_id(the_gene.id) + + #3.2. create constraint + + expr = Kb * RNAP_l - copy_number * free_rnap_times_gene_conc + + # scaling + # expr *= model.dna.scaling_factor + + #3.3 Add it to the model + + model.add_constraint(kind=RNAPBindingEquilibrium, + hook=the_gene, + expr=expr, + ub= copy_number*dna_hat_resolution/2, + lb=-copy_number*dna_hat_resolution/2, + queue=True) + model._push_queue() + model.regenerate_constraints() + model.regenerate_variables() From 3d69706a9d5566fcc5842a37edd3970149b7da11 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:13:17 +0100 Subject: [PATCH 16/19] FIX: DNA formation export not properly handled --- etfl/io/dict.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/etfl/io/dict.py b/etfl/io/dict.py index 176bf17..1da4834 100644 --- a/etfl/io/dict.py +++ b/etfl/io/dict.py @@ -29,6 +29,7 @@ EnzymaticReaction, ProteinComplexation, DegradationReaction, \ ExpressionReaction, DNAFormation from ..core.thermomemodel import ThermoMEModel +from ..core.allocation import DNA_FORMATION_RXN_ID from ..optim.utils import rebuild_constraint, rebuild_variable from ..optim.variables import tRNAVariable, GrowthRate, FreeEnzyme from ..utils.utils import replace_by_reaction_subclass, replace_by_me_gene @@ -406,12 +407,13 @@ def _add_me_reaction_info(rxn, rxn_dict): elif isinstance(rxn, EnzymaticReaction): rxn_dict['kind'] = 'EnzymaticReaction' rxn_dict['enzymes'] = [x.id for x in rxn.enzymes] - elif isinstance(rxn, DNAFormation): - rxn_dict['kind'] = 'DNAFormation' # Generic Reaction else: rxn_dict['kind'] = 'Reaction' + if isinstance(rxn, DNAFormation): + rxn_dict['kind'] = 'DNAFormation' + def _add_thermo_reaction_info(rxn, rxn_dict): if hasattr(rxn, 'thermo'): rxn_dict['thermo'] = rxn.thermo @@ -823,7 +825,9 @@ def find_degradation_reactions_from_dict(new, obj): def find_dna_formation_reaction_from_dict(new, obj): new_rxns = list() for rxn_dict in obj['reactions']: - if rxn_dict['kind'] == 'DNAFormation': + # TODO: CLEANUP + if rxn_dict['id'] == DNA_FORMATION_RXN_ID: + # if rxn_dict['kind'] == 'DNAFormation': dna = new.dna new_rxn = replace_by_reaction_subclass(new, kind = DNAFormation, @@ -894,7 +898,7 @@ def find_genes_from_dict(new, obj): # Newer models g._rna = gene_dict['rna'] g._peptide = gene_dict['peptide'] - g._copy_number = gene_dict['copy_number'] + g._copy_number = int(gene_dict['copy_number']) g._transcribed_by = [new.enzymes.get_by_id(e) for e in gene_dict['transcribed_by']] \ if gene_dict['transcribed_by'] else None From 035d206f0df24bbe71183a2d7f55b7c294bd3474 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:15:09 +0100 Subject: [PATCH 17/19] ENH: DNA Polymerase --- etfl/data/ecoli.py | 48 ++++++++++++++++++++++++++++++++++ tutorials/helper_gen_models.py | 9 ++++--- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/etfl/data/ecoli.py b/etfl/data/ecoli.py index 8d8de4d..9ceaefb 100644 --- a/etfl/data/ecoli.py +++ b/etfl/data/ecoli.py @@ -785,6 +785,54 @@ def get_atp_synthase_coupling(atps_name): return {atps_name:[atp_synthase]} +def get_dna_polymerase(dna_pol_name='DNAPol3'): + """ + https://en.wikipedia.org/wiki/DNA_polymerase_III_holoenzyme + + The replisome is composed of the following: + + 2 DNA Pol III enzymes, each comprising α, ε and θ subunits. (It has been proven that there is a third copy of Pol III at the replisome.[1]) + the α subunit (encoded by the dnaE gene) has the polymerase activity. + the ε subunit (dnaQ) has 3'→5' exonuclease activity. + the θ subunit (holE) stimulates the ε subunit's proofreading. + 2 β units (dnaN) which act as sliding DNA clamps, they keep the polymerase bound to the DNA. + 2 τ units (dnaX) which act to dimerize two of the core enzymes (α, ε, and θ subunits). + 1 γ unit (also dnaX) which acts as a clamp loader for the lagging strand Okazaki fragments, helping the two β subunits to form a unit and bind to DNA. The γ unit is made up of 5 γ subunits which include 3 γ subunits, 1 δ subunit (holA), and 1 δ' subunit (holB). The δ is involved in copying of the lagging strand. + Χ (holC) and Ψ (holD) which form a 1:1 complex and bind to γ or τ. X can also mediate the switch from RNA primer to DNA.[2] + :return: + """ + + # DNA POLYMERASE III HOLOENZYME: Structure and Function of a Chromosomal Replicating Machine + # Annual Review of Biochemistry + # Vol. 64:171-200 (Volume publication date July 1995) + # https://doi.org/10.1146/annurev.bi.64.070195.001131 + + ktrans = 1000 * 3600 + + composition = { + # 2 DNA Pol III Enzymes + 'b0184':2, #dnaE + 'b0215':2, #dnaQ + 'b1842':2, #holE + # Beta sliding clamp units and Tau scaffold + 'b3701': 2*2, # dnaN, Beta clamps + 'b0470': 3, # dnaX, 2 tau units and 1 gamma unit + 'b0640': 1, # holA, Delta subunit + 'b1099': 1, # holB, Delta prime subunit + 'b4259': 1, # holC, Xi subunit + 'b4372': 1, # holD, Psi subunit + + } + + dna_polymerase = Enzyme(dna_pol_name, + name='DNA Polymerase 3', + kcat_fwd=ktrans, + kcat_bwd=ktrans, + kdeg=kdeg_enz, + composition=composition) + + return dna_polymerase + def get_transporters_coupling(model, additional_enz): diff --git a/tutorials/helper_gen_models.py b/tutorials/helper_gen_models.py index d494dba..3b20691 100644 --- a/tutorials/helper_gen_models.py +++ b/tutorials/helper_gen_models.py @@ -26,7 +26,7 @@ get_mrna_metrics, get_enz_metrics, \ remove_from_biomass_equation, get_ecoli_gen_stats, \ get_lloyd_coupling_dict, get_transporters_coupling, \ - get_essentials, get_average_kcat + get_essentials, get_average_kcat, get_dna_polymerase from etfl.optim.config import standard_solver_config from etfl.analysis.summary import print_standard_sol @@ -207,8 +207,8 @@ def create_model(has_thermo, has_expression, has_allocation, rna_nucleotides_mp = rna_nucleotides_mp ) ecoli.add_mrnas(mrna_dict.values()) - ecoli.add_ribosome(rib,free_ratio=free_rib_ratio) - ecoli.add_rnap(rnap, free_ratio=free_rnap_ratio) + ecoli.add_ribosome(rib, free_rib_ratio) + ecoli.add_rnap(rnap, free_rnap_ratio) ecoli.build_expression() ecoli.add_enzymatic_coupling(coupling_dict) @@ -235,6 +235,9 @@ def create_model(has_thermo, has_expression, has_allocation, chromosome_len=chromosome_len, dna_dict=dna_nucleotides) + dna_pol = get_dna_polymerase() + ecoli.add_enzymatic_coupling({'DNA_formation':[dna_pol,]}) + # Need to put after, because dummy has to be taken into account if used. ecoli.populate_expression() ecoli.add_trna_mass_balances() From a0c5bd19197fab37f8316d80fb4e73b9faefb093 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:16:18 +0100 Subject: [PATCH 18/19] ENH: Including RNAP Binding for plasmid study --- work/plasmids/main.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/work/plasmids/main.py b/work/plasmids/main.py index 856f860..cb15c48 100644 --- a/work/plasmids/main.py +++ b/work/plasmids/main.py @@ -13,6 +13,7 @@ from etfl.optim.config import standard_solver_config from etfl.analysis.summary import print_standard_sol from etfl.core.vector import TransModel +from etfl.core.equilibrium import add_rnap_binding_equilibrium_constraints import yaml import sys @@ -32,7 +33,7 @@ def write_conf(model, conf): filename = 'outputs/' + conf['tag'] + '/config.yaml' - conf['objective'] = model.objective.expression + conf['objective'] = str(model.objective.expression) with open(filename,'w') as fid: yaml.dump(config, fid) @@ -50,7 +51,7 @@ def write_conf(model, conf): standard_solver_config(model) model.solver.configuration.verbosity = config['options']['verbose'] - copy_number = config['simulation']['copy_number'] + copy_number = int(config['simulation']['copy_number']) vector_generator = vec_dict[config['simulation']['vector']] has_rnap = config['simulation']['add_rnap'] @@ -61,8 +62,26 @@ def write_conf(model, conf): # Model integration # ##################### + # 1. Adding RNAP eq constraints + # Constant for binding of RNAP to lac promoter + # Value 550 nM + # Organism Bacteria Escherichia coli + # Reference Bintu L, Buchler NE, Garcia HG, Gerland U, Hwa T, Kondev J, Phillips R. Transcriptional regulation by the numbers: models. Curr Opin Genet Dev. 2005 Apr15(2):116-24. Fig. 1PubMed ID15797194 + # Primary Source [38] Liu M, Gupte G, Roy S, Bandwar RP, Patel SS, Garges S. Kinetics of transcription initiation at lacP1. Multiple roles of cyclic AMP receptor protein. J Biol Chem. 2003 Oct 10 278(41):39755-61PubMed ID12881519 + # Method Primary source abstract: "[Researchers] examined the kinetics of open complex formation at the lacP1 promoter using tryptophan fluorescence of RNA polymerase and DNA fragments with 2-aminopurine substituted at specific positions." + # Comments P.118 caption to fig.1c: "In particular, making the simplest assumption that the genomic background for RNAP is given only by the non-specific binding of RNAP with DNA, [investigators] take K[NS]pd=10 000 nM [ref 37], for the lac promoter K[S]pd=550 nM [primary source] and for the T7 promoter, K[S]pd=3nM [BNID 103592]. For the lac promoter, this results in Δɛpd=-2.9kBT and for the T7 promoter, Δɛpd=-8.1kBT." + # BNID 103590 + # ecoli density is ~ 1.2 + # mol/L * L/kg * kg/g * g/gDW + Kb = 1e-9 * 1/1.2 * 1000 * 1/0.5 + add_rnap_binding_equilibrium_constraints(model, + the_rnap=model.rnap['rnap'], + Kb=Kb) + + transmodel = TransModel(model, inplace = config['options']['inplace']) transmodel.add_vector(my_plasmid, copy_number = copy_number) + transmodel.reactions.ALS.upper_bound = 0 transmodel.optimize() print_standard_sol(transmodel) @@ -84,4 +103,5 @@ def write_conf(model, conf): outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments) # Check antibiotic resistance is expressed ? - write_conf(transmodel, config) + #TODO: fix + # write_conf(transmodel, config) From 9d1420da587cfc686597a60c7395acc08332c389 Mon Sep 17 00:00:00 2001 From: Pierre Salvy Date: Fri, 8 Nov 2019 11:18:02 +0100 Subject: [PATCH 19/19] ENH: DNA polymerase nucleotide info --- organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv | 11 ++++++++++- organism_data/ntseq_from_kegg.py | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv b/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv index 9d6b193..d61b892 100644 --- a/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv +++ b/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv @@ -1429,4 +1429,13 @@ b1717,atgccaaaaattaagaccgtacgcggtgctgctaagcgcttcaaaaaaaccggtaaaggtggttttaagcacaa b4506,atgaaagttcttaactctctgcgtaccgcaaaagaacgccatccagactgtcagattgtgaagcgaaaaggacggctatatgtgatttgtaaatctaatccacgttttaaggccgttcagggtcgtaagaaaaaacgttga b3299,atgaaagttcgtgcttccgtcaagaaattatgccgtaactgcaaaatcgttaagcgtgatggtgtcatccgtgtgatttgcagtgccgagccgaagcataaacagcgccaaggctga b2811,atgacaaacccgcaattcgccggacatccgttcggcacaaccgtaaccgcagaaacgttacgcaataccttcgcaccgttgacgcaatgggaagataaatatcgccagttgatcatgctggggaaacagcttccggcattgccagacgagttaaaagcgcaggctaaagagattgccggatgcgaaaaccgcgtctggctgggatatacagtggctgaaaacggcaaaatgcatttctttggcgacagcgaagggcgcattgtgcgcggcctgctggcggtgttgttgactgccgttgaggggaaaaccgccgccgagttgcaggcacagtcaccactggcattgtttgatgagctgggattacgtgcgcagcttagcgcctcacgcagccaggggttaaatgcgttaagcgaggcgattatcgctgcgacgaagcaggtttaa -b0010,atgggcaacactaagttggctaatccggcaccgctgggcctgatgggcttcggcatgaccaccattctgcttaacctgcacaacgtgggttatttcgctctggacggtattattcttgccatgggcattttctacggcggcatcgcgcaaatttttgctggtctgctggagtacaaaaaaggcaacactttcggtttaaccgcattcacctcttacggttctttctggctgacgctggttgcgattctgctgatgccgaaactgggtctgaccgatgcgccaaatgcacagttccttggtgtctacctgggtctgtggggcgtatttacgctgtttatgttcttcggcacgctgaaaggcgcacgcgttctgcaattcgttttctttagcctgaccgtgctgtttgccctgctggcgatcggtaacattgccggtaacgccgcaatcatccactttgccggctggattgggctgatctgcggtgccagcgcaatctatctggcgatgggtgaagtactgaacgagcagtttggtcgcaccgttctgccgattggtgaatcccactaa \ No newline at end of file +b0010,atgggcaacactaagttggctaatccggcaccgctgggcctgatgggcttcggcatgaccaccattctgcttaacctgcacaacgtgggttatttcgctctggacggtattattcttgccatgggcattttctacggcggcatcgcgcaaatttttgctggtctgctggagtacaaaaaaggcaacactttcggtttaaccgcattcacctcttacggttctttctggctgacgctggttgcgattctgctgatgccgaaactgggtctgaccgatgcgccaaatgcacagttccttggtgtctacctgggtctgtggggcgtatttacgctgtttatgttcttcggcacgctgaaaggcgcacgcgttctgcaattcgttttctttagcctgaccgtgctgtttgccctgctggcgatcggtaacattgccggtaacgccgcaatcatccactttgccggctggattgggctgatctgcggtgccagcgcaatctatctggcgatgggtgaagtactgaacgagcagtttggtcgcaccgttctgccgattggtgaatcccactaa +b0184,atgtctgaaccacgtttcgtacacctgcgggtgcacagcgactactcgatgatcgatggcctggccaaaaccgcaccgttggtaaaaaaggcggcggcgttgggtatgccagcactggcgatcaccgatttcaccaacctttgtggtctggtgaagttctacggagcgggacatggcgcagggattaagcctatcgtcggggcagattttaacgtccagtgcgacctgctgggtgatgagttaacccacctgacggtactggcggcgaacaataccggctatcagaatctgacgttgctgatctcaaaagcgtatcagcgcgggtacggtgccgccgggccgatcatcgatcgcgactggcttatcgaattaaacgaagggttgatccttctttccggcggacgcatgggcgacgtcggacgcagtcttttgcgtggtaacagcgcgctggtagatgagtgtgtcgcgttttatgaagaacacttcccggatcgctattttctcgagctgatccgcaccggcaggccggatgaagaaagctatctgcacgcggcggtggaactggcggaagcgcgcggtttgcccgtcgtggcgaccaacgacgtgcgctttatcgacagcagcgactttgacgcacacgaaatccgcgtcgcgatccacgacggctttaccctcgacgatcctaaacgcccgcgtaactattcgccgcagcaatatatgcgtagcgaagaggagatgtgtgagctgtttgccgacatccccgaagcccttgccaacaccgttgagatcgccaaacgctgtaacgtaaccgtgcgtcttggtgaatacttcctgccgcagttcccgaccggggacatgagcaccgaagattatctggtcaagcgtgcaaaagagggcctggaagagcgtctggcctttttattccctgatgaggaagaacgtcttaagcgccgcccggaatatgacgaacgtctggagactgaacttcaggttatcaaccagatgggcttcccgggctacttcctcatcgttatggaatttatccagtggtcgaaagataacggcgtaccggtagggccaggccgtggctccggtgcgggttcactggtggcctacgcgctgaaaatcaccgacctcgatccgctggaatttgacctgctgttcgaacgtttccttaacccggaacgtgtctccatgcctgacttcgacgttgacttctgtatggagaaacgcgatcaggttatcgagcacgtagcggacatgtacggtcgtgatgcggtatcgcagatcatcaccttcggtacaatggcggcgaaagcggtgatccgcgacgtaggccgcgtgctggggcatccgtacggctttgtcgatcgtatctcgaaactgatcccgcccgatccggggatgacgctggcgaaagcgtttgaagccgagccgcagctgccggaaatctacgaagcggatgaagaagttaaggcgctgatcgacatggcgcgcaaactggaaggggtcacccgtaacgccggtaagcacgccggtggggtggttatcgcgccgaccaaaattaccgattttgcgccgctttactgcgatgaagagggcaaacatccggtcacccagtttgataaaagcgacgttgaatacgccggactggtgaagttcgacttccttggtttgcgtacgctcaccatcatcaactgggcgctggagatgatcaacaagcggcgggcgaagaatggcgagccgccgctggatatcgctgcgatcccgctggatgataagaaaagcttcgacatgctgcaacgctcggaaaccacggcggtattccagcttgaatcgcgcggcatgaaggacctgatcaagcgtctacaacctgactgcttcgaagatatgatcgccctagtggcactgttccgccccggtccgttgcaatcagggatggtggataactttatcgaccgtaaacatggtcgtgaagagatctcctatccggacgtacagtggcagcatgaaagcctgaaaccggtactggagccaacctacggcattatcctgtatcaggaacaggtcatgcagattgcgcaggtgctttctggttataccctcggtggcgcggatatgctgcgtcgtgcgatgggtaagaaaaagccggaagagatggctaagcaacgttctgtatttgctgaaggtgcagaaaagaacggaatcaacgctgaactggcgatgaaaatcttcgacctggtggagaaattcgctggttacggatttaacaaatcgcactctgcggcctatgctttggtgtcatatcaaacgttatggctgaaagcgcactatcctgcggagtttatggcggcggtaatgaccgccgatatggacaacaccgagaaggtggtgggtctggtggatgagtgctggcggatggggctgaaaatcctgccaccagatataaactccggtctttaccatttccacgtcaacgacgacggcgaaatcgtgtatggtattggcgcgatcaaaggggtcggtgaaggtccgattgaggccatcatcgaagcccgtaataaaggcggctacttccgcgaactgtttgatctctgcgcccgtaccgacaccaaaaagttgaaccgtcgcgtgctggaaaaactgatcatgtccggggcgtttgaccgtcttgggccacatcgcgcagcgctgatgaactcgctgggcgatgcgttaaaagcggcagatcaacacgcgaaagcggaagctatcggtcaggccgatatgttcggcgtgctggccgaagagccggaacaaattgaacaatcctacgccagctgccaaccgtggccggagcaggtggtattagatggggaacgtgaaacgttaggcctgtacctgaccggacaccctatcaaccagtatttaaaagagattgagcgttatgtcggaggcgtaaggctgaaagacatgcacccgacagaacgtggtaaagtcatcacggctgcggggctcgttgttgccgcgcgggttatggtcaccaagcgcggcaatcgtatcggtatctgcacgctggatgaccgttccgggcggctggaagtgatgttgtttactgacgccctggataaataccagcaattgctggaaaaagaccgcatacttatcgtcagcggacaggtcagctttgatgacttcagcggtgggcttaaaatgaccgctcgcgaagtgatggatattgacgaagcccgggaaaaatatgctcgcgggcttgctatctcgctgacggacaggcaaattgatgaccagcttttaaaccgactccgtcagtctctggaaccccaccgctctgggacaattccagtacatctctactatcagagggcggatgcacgcgcgcggttgcgttttggcgcgacgtggcgtgtctctccgagcgatcgtttattaaacgatctccgtggcctcattggttcggagcaggtggaactggagtttgactaa +b0215,atgagcactgcaattacacgccagatcgttctcgataccgaaaccaccggtatgaaccagattggtgcgcactatgaaggccacaagatcattgagattggtgccgttgaagtggtgaaccgtcgcctgacgggcaataacttccatgtttatctcaaacccgatcggctggtggatccggaagcctttggcgtacatggtattgccgatgaatttttgctcgataagcccacgtttgccgaagtagccgatgagttcatggactatattcgcggcgcggagttggtgatccataacgcagcgttcgatatcggctttatggactacgagttttcgttgcttaagcgcgatattccgaagaccaatactttctgtaaggtcaccgatagccttgcggtggcgaggaaaatgtttcccggtaagcgcaacagcctcgatgcgttatgtgctcgctacgaaatagataacagtaaacgaacgctgcacggggcattactcgatgcccagatccttgcggaagtttatctggcgatgaccggtggtcaaacgtcgatggcttttgcgatggaaggagagacacaacagcaacaaggtgaagcaacaattcagcgcattgtacgtcaggcaagtaagttacgcgttgtttttgcgacagatgaagagattgcagctcatgaagcccgtctcgatctggtgcagaagaaaggcggaagttgcctctggcgagcataa +b1842,atgctgaagaatctggctaaactggatcaaacagaaatggataaagtgaatgtcgatttggcggcggccggggtggcatttaaagaacgctacaatatgccggtgatcgctgaagcggttgaacgtgaacagcctgaacatttgcgcagctggtttcgcgagcggcttattgcccaccgtttggcttcggtcaatctgtcacgtttaccttacgagcccaaacttaaataa +b3701,atgaaatttaccgtagaacgtgagcatttattaaaaccgctacaacaggtgagcggtccgttaggtggtcgtcctacgctaccgattctcggtaatctgctgttacaggttgctgacggtacgttgtcgctgaccggtactgatctcgagatggaaatggtggcacgtgttgcgctggttcagccacacgagccaggagcgacgaccgttccggcgcgcaaattctttgatatctgccgtggtctgcctgaaggcgcggaaattgccgtgcagctggaaggtgaacggatgctggtacgctccgggcgtagccgtttttcgctgtctaccctgccagcggcggatttcccgaacctcgatgactggcagagtgaagtcgaatttaccctgccgcaggcaacgatgaagcgtctgattgaagcgacccagttttctatggcgcatcaggacgttcgctattacttaaatggtatgctgtttgaaaccgaaggtgaagaactgcgcaccgtggcaaccgacggccaccgtctggcggtctgttcaatgccaattggtcaatctttgccaagccattcggtgatcgtaccgcgtaaaggcgtgattgaactgatgcgtatgctcgacggcggcgacaatccgctgcgcgtacagattggcagcaacaacattcgcgcccacgttggcgactttatcttcacctccaaactggtggatggtcgcttcccggattatcgccgcgttctgccgaagaacccggacaaacatctggaagctggctgcgatctgctcaagcaggcgtttgctcgcgcggcgattctctctaacgagaaattccgcggcgtacgtctttatgtcagcgaaaaccagctgaaaatcaccgccaacaacccggaacaggaagaagcggaagagatcctcgacgttacctatagcggtgcggagatggaaatcggcttcaacgtcagttatgtgctggatgttctgaacgcgctgaaatgcgaaaacgtccgcatgatgctgaccgattcggtttccagcgtgcagattgaagatgcggccagccagagcgcggcttatgttgtcatgccaatgagactgtaa +b0470,atgagttatcaggtcttagcccgaaaatggcgcccacaaacctttgctgacgtcgtcggccaggaacatgtgctgaccgcactggcgaacggcttgtcgttagggcgtattcatcatgcttatcttttttccggcacccgtggcgtcggaaaaacctctatcgcccgactgctggcgaaggggctaaactgcgaaaccggcattaccgcgacgccgtgcggcgtgtgcgataactgtcgtgaaatcgagcaggggcgctttgtcgatctgattgaaatcgacgccgcctcgcgcaccaaagttgaagatacccgcgacctgctggataacgtccagtacgctccggcgcgtggtcgtttcaaagtttatctgatcgacgaagtgcatatgctgtcgcgccacagctttaacgcactgttaaaaacccttgaagagccgccggagcacgttaagtttctgctggcgacgaccgatccacagaaattgccggtgacgattttgtcacgctgtctgcaatttcatctcaaggcgctggatgtcgagcaaattcgccatcagcttgagcacatcctcaacgaagaacatatcgctcacgagccgcgggcgctgcaattgctggcacgcgccgctgaaggcagcctgcgagatgccttaagtctgaccgaccaggcgattgccagcggtgacggccaggtttcaacccaggcggtcagtgcgatgctgggtacgcttgacgacgatcaggcgctgtcgctggttgaagcgatggtcgaggccaacggcgagcgcgtaatggcgctgattaatgaagccgctgcccgtggtatcgagtgggaagcgttgctggtggaaatgctcggcctgttgcatcgtattgcgatggtacaactttcgcctgctgcacttggcaacgacatggccgccatcgagctgcggatgcgtgaactggcgcgcaccataccgccgacggatattcagctttactatcagacgctgttgattggtcgcaaagaattaccgtatgcgccggaccgtcgcatgggcgttgagatgacgctgctgcgcgcgctggcattccatccgcgtatgccgctgcctgagccagaagtgccacgacagtcctttgcacccgtcgcgccaacggcagtaatgacgccaacccaggtgccgccgcaaccgcaatcagcgccgcagcaggcaccgactgtaccgctcccggaaaccaccagccaggtgctggcggcgcgccagcagttgcagcgcgtgcagggagcaaccaaagcaaaaaagagtgaaccggcagccgctacccgcgcgcggccggtgaataacgctgcgctggaaagactggcttcggtcaccgatcgcgttcaggcgcgtccggtgccatcggcgctggaaaaagcgccagccaaaaaagaagcgtatcgctggaaggcgaccactccggtgatgcagcaaaaagaagtggtcgccacgccgaaggcgctgaaaaaagcgctggaacatgaaaaaacgccggaactggcggcgaagctagcggcagaagccattgagcgcgacccgtgggcggcacaggtgagccaactttcgctaccaaaactggtcgaacaggtggcgttaaatgcctggaaagaggagagcgacaacgcagtatgtctgcatttgcgctcctctcagcggcatttgaacaaccgcggtgcacagcaaaaactggctgaagcgttgagcatgttaaaaggttcaacggttgaactgactatcgttgaagatgataatcccgcggtgcgtacgccgctggagtggcgtcaggcgatatacgaagaaaaacttgcgcaggcgcgcgagtccattattgcggataataatattcagaccctgcgtcggttcttcgatgcggagctggatgaagaaagtatccgccccatttga +b0640,atgattcggttgtacccggaacaactccgcgcgcagctcaatgaagggctgcgcgcggcgtatcttttacttggtaacgatcctctgttattgcaggaaagccaggacgctgttcgtcaggtagctgcggcacaaggattcgaagaacaccacactttttccattgatcccaacactgactggaatgcgatcttttcgttatgccaggctatgagtctgtttgccagtcgacaaacgctattgctgttgttaccagaaaacggaccgaatgcggcgatcaatgagcaacttctcacactcaccggacttctgcatgacgacctgctgttgatcgtccgcggtaataaattaagcaaagcgcaagaaaatgccgcctggtttactgcgcttgcgaatcgcagcgtgcaggtgacctgtcagacaccggagcaggctcagcttccccgctgggttgctgcgcgcgcaaaacagctcaacttagaactggatgacgcggcaaatcaggtgctctgctactgttatgaaggtaacctgctggcgctggctcaggcactggagcgtttatcgctgctctggccagacggcaaattgacattaccgcgcgttgaacaggcggtgaatgatgccgcgcatttcaccccttttcattgggttgatgctttgttgatgggaaaaagtaagcgcgcattgcatattcttcagcaactgcgtctggaaggcagcgaaccggttattttgttgcgcacattacaacgtgaactgttgttactggttaacctgaaacgccagtctgcccatacgccactgcgtgcgttgtttgataagcatcgggtatggcagaaccgccggggcatgatgggcgaggcgttaaatcgcttaagtcagacgcagttacgtcaggccgtgcaactcctgacacgaacggaactcaccctcaaacaagattacggtcagtcagtgtgggcagagctggaagggttatctcttctgttgtgccataaacccctggcggacgtatttatcgacggttga +b1099,atgagatggtatccatggttacgacctgatttcgaaaaactggtagccagctatcaggccggaagaggtcaccatgcgctactcattcaggcgttaccgggcatgggcgatgatgctttaatctacgccctgagccgttatttactctgccaacaaccgcagggccacaaaagttgcggtcactgtcgtggatgtcagttgatgcaggctggcacgcatcccgattactacaccctggctcccgaaaaaggaaaaaatacgctgggcgttgatgcggtacgtgaggtcaccgaaaagctgaatgagcacgcacgcttaggtggtgcgaaagtcgtttgggtaaccgatgctgccttactaaccgacgccgcggctaacgcattgctgaaaacgcttgaagagccaccagcagaaacttggtttttcctggctacccgcgagcctgaacgtttactggcaacattacgtagtcgttgtcggttacattaccttgcgccgccgccggaacagtacgccgtgacctggctttcacgcgaagtgacaatgtcacaggatgcattacttgccgcattgcgcttaagcgccggttcgcctggcgcggcactggcgttgtttcagggagataactggcaggctcgtgaaacattgtgtcaggcgttggcatatagcgtgccatcgggcgactggtattcgctgctagcggcccttaatcatgaacaagctccggcgcgtttacactggctggcaacgttgctgatggatgcgctaaaacgccatcatggtgctgcgcaggtgaccaatgttgatgtgccgggcctggtcgccgaactggcaaaccatctttctccctcgcgcctgcaggctatactgggggatgtttgccacattcgtgaacagttaatgtctgttacaggcatcaaccgcgagcttctcatcaccgatcttttactgcgtattgagcattacctgcaaccgggcgttgtgctaccggttcctcatctttaa +b4259,atgaaaaacgcgacgttctaccttctggacaatgacaccaccgtcgatggcttaagcgccgttgagcaactggtgtgtgaaattgccgcagaacgttggcgcagcggtaagcgcgtgctcatcgcctgtgaagatgaaaagcaggcttaccggctggatgaagccctgtgggcgcgtccggcagaaagctttgttccgcataatttagcgggagaaggaccgcgcggcggtgcaccggtggagatcgcctggccgcaaaagcgtagcagcagccggcgcgatatattgattagtctgcgaacaagctttgcagattttgccaccgctttcacagaagtggtagacttcgttccttatgaagattctctgaaacaactggcgcgcgaacgctataaagcctaccgcgtggctggtttcaacctgaatacggcaacctggaaataa +b4372,atgacatcccgacgagactggcagttacagcaactgggcattacccagtggtcgctgcgtcgccctggcgcgttgcagggggagattgccattgcgatcccggcacacgtccgtctggtgatggtggcaaacgatcttcccgccctgactgatcctttagtgagcgatgttctgcgcgcattaaccgtcagccccgaccaggtgctgcaactgacgccagaaaaaatcgcgatgctgccgcaaggcagtcactgcaacagttggcggttgggtactgacgaaccgctatcactggaaggcgctcaggtggcatcaccggcgctcaccgatttacgggcaaacccaacggcacgcgccgcgttatggcaacaaatttgcacatatgaacacgatttcttccctcgaaacgactga \ No newline at end of file diff --git a/organism_data/ntseq_from_kegg.py b/organism_data/ntseq_from_kegg.py index eedd8e9..841ce60 100644 --- a/organism_data/ntseq_from_kegg.py +++ b/organism_data/ntseq_from_kegg.py @@ -19,10 +19,13 @@ delimiter='\t', header=None)[0] -transporter_genes = pd.read_csv(pjoin(data_dir,'transporters_kcats.csv'), +transporter_genes = pd.read_csv(pjoin(data_folder,'transporters_kcats.csv'), header=0, skiprows=[1,], # Units row index_col=0)['gene'] +dnapol3_genes = pd.read_csv(pjoin(data_folder,'dnapol3_genes.csv'), + header=None) + all_b_genes = pd.concat([all_b_genes, rnap_genes, rrna_genes, rpeptide_genes, transporter_genes]) all_b_genes.drop_duplicates(inplace=True)