From 7cd4fa74f6b6da7a517d7023118560ee272f8a46 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 9 Oct 2019 13:37:18 +0200
Subject: [PATCH 01/19] ENH: YAML config of vector studies

---
 work/plasmids/analysis.py                    |  39 ++
 work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml |  24 +
 work/plasmids/main.py                        |  66 +++
 work/plasmids/utils.py                       |  10 +-
 work/plasmids/vector.py                      | 443 +++++++++----------
 5 files changed, 346 insertions(+), 236 deletions(-)
 create mode 100644 work/plasmids/analysis.py
 create mode 100644 work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
 create mode 100644 work/plasmids/main.py

diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py
new file mode 100644
index 0000000..a16fb53
--- /dev/null
+++ b/work/plasmids/analysis.py
@@ -0,0 +1,39 @@
+from etfl.optim.utils import check_solution
+from etfl.optim.variables import mRNAVariable, EnzymeVariable
+
+
+def chebyshev(model, chebyshev_include, inplace=True, big_m=1000):
+    from etfl.optim.variables import mRNAVariable,EnzymeVariable
+    from etfl.optim.constraints import ForwardCatalyticConstraint,BackwardCatalyticConstraint
+    from etfl.analysis.dynamic import chebyshev_center, compute_center
+
+    chebyshev_variables = model.get_variables_of_type(mRNAVariable)
+    chebyshev_variables += model.get_variables_of_type(EnzymeVariable)
+
+    chebyshev_center(model, chebyshev_variables,
+                 inplace=inplace,
+                 big_m=big_m,
+                 include=chebyshev_include)
+
+    # Revert to the previous objective, as chebyshev_center sets it to maximize
+    # the radius
+    model.objective = the_obj
+
+    # We want to compute the center under the constraint of the growth at the
+    # initial solution.
+    chebyshev_sol = compute_center(model, provided_solution=initial_solution)
+    print_standard_sol(model)
+
+
+def export_mrna(model, tag, solution=None):
+    solution = check_solution(solution)
+
+    mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('id')
+
+    data = solution.raw
+    mrna_data = data[mrna_var_ids]
+    mrna_data.name = tag
+
+    filename = 'outputs/' + tag + 'mrna' + '.csv'
+
+    mrna_data.to_csv(filename, header=True)
\ No newline at end of file
diff --git a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
new file mode 100644
index 0000000..aab6683
--- /dev/null
+++ b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
@@ -0,0 +1,24 @@
+model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191007_142013.json'
+tag: 'vEFL_BDO_1c_RNAP'
+simulation:
+  copy_number: 1
+  vector: 'plasmid_pET-AR-ALS'
+  add_rnap: yes
+assumptions:
+  growth_rate: 'auto'
+analysis:
+  # Export values ?
+  export_peptides: yes
+  export_mrna: yes
+  export_fluxes: yes
+  # Perform Chebyshev centering ?
+  chebyshev:
+    chebyshev_include:
+      - ForwardCatalyticConstraint
+      - BackwardCatalyticConstraint
+      - RNAPAllocation
+      - ExpressionCoupling
+    inplace: false
+options:
+  verbose: false
+  inplace: true
diff --git a/work/plasmids/main.py b/work/plasmids/main.py
new file mode 100644
index 0000000..aa31561
--- /dev/null
+++ b/work/plasmids/main.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+"""
+.. module:: ETFL
+   :platform: Unix, Windows
+   :synopsis: Models vectors in ETFL models
+
+.. moduleauthor:: ETFL team
+
+Model RNAP limitation in case of vector addition to a host
+"""
+
+from etfl.io.json import load_json_model
+from etfl.optim.config import standard_solver_config
+from etfl.analysis.summary import print_standard_sol
+from etfl.core.vector import TransModel
+import yaml
+import sys
+
+from utils import read_config
+from vector import get_bdo_plasmid
+
+if len(sys.argv)<=1:
+    raise ValueError('Please provide a YAML configuration file')
+else:
+    CONFIG = sys.argv[1]
+
+print('Using configuration file: {}'.format(CONFIG))
+
+vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid}
+
+if __name__ == '__main__':
+    config = read_config(CONFIG)
+
+    if config['model'] != 'debug':
+        model = load_json_model(config['model'])
+    else:
+        from etfl.tests.small_model import create_etfl_model
+        model = create_etfl_model(0,1)
+
+    standard_solver_config(model)
+    model.solver.configuration.verbosity = config['options']['verbose']
+
+    copy_number = config['simulation']['copy_number']
+    vector_generator = vec_dict[config['simulation']['vector']]
+    has_rnap = config['simulation']['add_rnap']
+
+    my_plasmid = vector_generator(model, has_rnap)
+
+
+    #####################
+    # Model integration #
+    #####################
+
+    transmodel = TransModel(model, inplace = config['options']['inplace'])
+    transmodel.add_vector(my_plasmid, copy_number = copy_number)
+
+    transmodel.optimize()
+    print_standard_sol(transmodel)
+
+    for analysis,arguments in config['analysis']:
+        if arguments != False:
+            arguments['model'] = transmodel
+            arguments['tag'] = config['tag']
+            analysis(**arguments)
+
+        # Check antibiotic resistance is expressed ?
\ No newline at end of file
diff --git a/work/plasmids/utils.py b/work/plasmids/utils.py
index df40aef..625596f 100644
--- a/work/plasmids/utils.py
+++ b/work/plasmids/utils.py
@@ -9,6 +9,7 @@
 Utility functions
 """
 
+import yaml
 
 def read_seq(filename):
     with open(filename,'r') as fid:
@@ -17,4 +18,11 @@ def read_seq(filename):
     seq = ''.join([x for line in all_lines for x in line.split()
                    if not x.isdigit()])
 
-    return seq
\ No newline at end of file
+    return seq
+
+def read_config(yaml_file):
+    with open(yaml_file, 'rb') as f:
+        conf = yaml.load(f.read(), Loader=yaml.SafeLoader)  # load the config file
+    return conf
+
+
diff --git a/work/plasmids/vector.py b/work/plasmids/vector.py
index 6f65b29..70fa388 100644
--- a/work/plasmids/vector.py
+++ b/work/plasmids/vector.py
@@ -1,242 +1,215 @@
-# -*- coding: utf-8 -*-
-"""
-.. module:: ETFL
-   :platform: Unix, Windows
-   :synopsis: Models vectors in ETFL models
-
-.. moduleauthor:: ETFL team
-
-Model RNAP limitation in case of vector addition to a host
-"""
-
-from etfl.io.json import load_json_model
-from etfl.core.vector import TransModel, Plasmid
+from etfl.data.ecoli import kdeg_enz, kdeg_mrna, get_average_kcat, get_rnap, \
+    get_nt_sequences
 from etfl.core.genes import ExpressedGene
 from etfl.core.rna import mRNA
 from etfl.core.enzyme import Enzyme
-from etfl.tests.small_model import create_etfl_model
-from etfl.data.ecoli import kdeg_enz, kdeg_mrna, get_average_kcat, get_rnap, \
-    get_nt_sequences
-from etfl.analysis.summary import print_standard_sol
+from etfl.core.vector import TransModel, Plasmid
+
+from utils import read_seq
 
 from cobra.core import Metabolite, Reaction
 
-from utils import read_seq
 
-# E. coli
-
-model = load_json_model('../../tutorials/models/'
-                        'iJO1366_vEFL_v_0.10_431_enz_128_bins__20191003_100718.json')
-
-# model = create_etfl_model(0,1)
-# model = create_etfl_model(0,0)
-
-# Plasmid pZS*-13S-ald-adh
-# US Patent 20,140,371,417.
-# Used in Andreozzi, Stefano, et al.
-# "Identification of metabolic engineering targets for the enhancement of
-# 1,4-butanediol production in recombinant E. coli using large-scale
-# kinetic models."
-# Metabolic engineering 35 (2016): 148-159.
-
-# (too complicated)
-
-# 2-gene BDO plasmid
-# Reshamwala, Shamlan MS, Shalini S. Deb, and Arvind M. Lali.
-# "A shortened, two-enzyme pathway for 2, 3-butanediol production in
-# Escherichia coli."
-# Journal of industrial microbiology & biotechnology 44.9 (2017): 1273-1277.
-
-###############
-# Metabolites #
-###############
-acetoin     = Metabolite(id='acetoin_c', name = 'Acetoin, cytosol',
-                         formula = 'C4H8O2', compartment='c')
-diacetyl    = Metabolite(id='diacetyl_c', name = 'Diacetyl, cytosol',
-                         formula = 'C4H6O2', compartment='c')
-bdo         = Metabolite(id='bdo_c', name='Meso-2,3-butanediol, cytosol',
-                         formula = 'C4H10O2', compartment='c')
-bdo_e       = Metabolite(id='bdo_e', name='Meso-2,3-butanediol, extracellular',
-                         formula = 'C4H10O2', compartment='e')
-acetolactate= model.metabolites.alac__S_c
-nadh        = model.metabolites.nadh_c
-nad         = model.metabolites.nad_c
-pyr         = model.metabolites.pyr_c
-co2         = model.metabolites.co2_c
-h           = model.metabolites.h_c
-#############
-# Reactions #
-#############
-
-# ALS: 2 pyr + h => alac + co2
-als = Reaction(id = 'ALS', name = 'Acetolactate synthase [Plasmid](Enterobacter)')
-als.add_metabolites({
-    pyr:-2,
-    h:-1,
-    acetolactate:1,
-    co2:1
-})
-
-# SALD: alac => diacetyl + co2 (spontaneous)
-sald = Reaction(id = 'SALD', name = 'Spontaneous acetolactate decarboxylation [Plasmid]')
-sald.add_metabolites({
-    acetolactate:-1,
-    diacetyl:1,
-    co2:1
-})
-
-# AR: diacetyl + NADH => acetoin + NAD+#############
-
-ar = Reaction(id = 'AR', name = 'Acetoin oxidoreductase (NADH) [Plasmid]')
-ar.add_metabolites({
-    diacetyl:-1,
-    nadh:-1,
-    h:-1,
-    acetoin:1,
-    nad:1
-})
-
-# BDH: acetoin + NADH => bdo + NAD+
-bdh = Reaction(id = 'BDH', name = 'Butanediol dehydrogenase [Plasmid]')
-bdh.add_metabolites({
-    acetoin:-1,
-    nadh:-1,
-    h:-1,
-    bdo:1,
-    nad:1
-})
-
-# BDO Transport
-bdo_tp = Reaction(id='BDOtp', name = 'BDO Transport (extracellular)')
-bdo_tp.add_metabolites({
-    bdo:-1,
-    bdo_e:1
-})
-
-# BDO Exchange
-bdo_ex = Reaction(id='EX_BDO', name = 'BDO Exchange')
-bdo_ex.add_metabolites({
-    bdo_e:-1,
-})
-
-reaction_list = [als, sald, ar, bdh, bdo_tp, bdo_ex]
-
-#############
-#   Genes   #
-#############
-
-ALS = ExpressedGene(id = 'EBA_als',
-                    name = 'acetolactate synthase',
-                    sequence = read_seq('data/ALS.txt'))
-AR = ExpressedGene (id = 'EBA_ar',
-                    name = 'acetoin reductase',
-                    sequence = read_seq('data/AR.txt'))
-fwd_als= 'GGAATTCCATATGAACAGTGAGAAACAGTC'
-rev_als= 'CCGAGCTCTTACAAAATCTGGCTGAGAT'
-fwd_ar = 'GGAATTCCATATGCAAAAAGTTGCTCTCGTAAC'
-rev_ar = 'CCGAGCTCTTAGTTGAACACCATCCCAC'
-
-pET = read_seq('data/pET.txt')
-
-#############
-#  Enzymes  #
-#############
-
-# ALS is 60 kDa, and is a homotrimer
-# Kaushal, Anju, Sunil Pabbi, and Prince Sharma.
-# "Characterization of 2, 3-butanediol-forming and valine-sensitive
-# α-acetolactate synthase of Enterobacter cloacae."
-# World Journal of Microbiology and Biotechnology 19.5 (2003): 487-493.
-
-# ALS kcat is 26.9 +- 2.5[s-1]
-# Choi, Myeong-Hyeon, et al.
-# "Molecular cloning and expression of Enterobacter aerogenes α-acetolactate
-# decarboxylase in pyruvate decarboxylase-deficient Saccharomyces cerevisiae
-# for efficient 2, 3-butanediol production."
-# Process biochemistry 51.2 (2016): 170-176.
-
-ALS_enz = Enzyme(
-    id = 'ALS',
-    kcat=26.9*3600,
-    kdeg=kdeg_enz,
-    composition = {'EBA_als':3}
-)
-
-# The third enzyme in the 2,3-butanediol pathway, AR, purified from A.aerogenes
-# consists of four equal-size sub-unitsof 25kDa(40).
-
-# Blomqvist, Kristina, et al.
-# "Characterization of the genes of the 2, 3-butanediol operons from Klebsiella
-# terrigena and Enterobacter aerogenes."
-# Journal of bacteriology 175.5 (1993): 1392-1404.
-# &
-# Stormer, Fredrik C.
-# "[106] 2, 3-Butanediol biosynthetic system in Aerobacter aerogenes."
-# Methods in enzymology. Vol. 41. Academic Press, 1975. 518-533.
-
-AR_enz = Enzyme(
-    id = 'AR',
-    kcat=get_average_kcat(),
-    kdeg=kdeg_enz,
-    composition = {'EBA_ar':4}
-)
-
-# Probably the catalytic activities are different
-BDH_enz = Enzyme(
-    id = 'BDH',
-    kcat=get_average_kcat(),
-    kdeg=kdeg_enz,
-    composition = {'EBA_ar':4}
-)
-
-############
-# Coupling #
-############
-
-coupling_dict = {'ALS':[ALS_enz], 'AR':[AR_enz], 'BDH':[BDH_enz]}
-
-# AR and BDH are catalyzed by the same enzyme
-
-###########
-# Plasmid #
-###########
-
-rnap = get_rnap()
-rnap.id = 'plasmid_rnap'
-
-nt_seq_ecoli = get_nt_sequences()
-rnap_genes = list()
-for g in rnap.composition:
-    this_gene = ExpressedGene(id='PLASMID_'+g,
-                              name='{}, Plasmid',
-                              sequence = nt_seq_ecoli[g])
-    rnap_genes.append(this_gene)
-
-gene_list = [ALS,AR] + rnap_genes
-
-plasmid_seq = pET \
-              + fwd_als + ALS.sequence + rev_als \
-              + fwd_ar + AR.sequence + rev_ar \
-              + ''.join(str(g.sequence) for g in rnap_genes)
-
-my_plasmid = Plasmid(id_ = 'pET-AR-ALS',
-                     sequence = plasmid_seq,
-                     genes = gene_list,
-                     reactions = reaction_list,
-                     coupling_dict = coupling_dict,
-                     rnap = rnap)
-my_plasmid.build_default_mrna(kdeg_mrna)
-
-#####################
-# Model integration #
-#####################
-
-transmodel = TransModel(model, inplace = True)
-transmodel.add_vector(my_plasmid, copy_number = 5)
-
-
-# Check antibiotic resistance is expressed ?
-
-
-transmodel.optimize()
-print_standard_sol(transmodel)
\ No newline at end of file
+def get_bdo_plasmid(model, has_rnap=False):
+
+    # Plasmid pZS*-13S-ald-adh
+    # US Patent 20,140,371,417.
+    # Used in Andreozzi, Stefano, et al.
+    # "Identification of metabolic engineering targets for the enhancement of
+    # 1,4-butanediol production in recombinant E. coli using large-scale
+    # kinetic models."
+    # Metabolic engineering 35 (2016): 148-159.
+
+    # (too complicated)
+
+    # 2-gene BDO plasmid
+    # Reshamwala, Shamlan MS, Shalini S. Deb, and Arvind M. Lali.
+    # "A shortened, two-enzyme pathway for 2, 3-butanediol production in
+    # Escherichia coli."
+    # Journal of industrial microbiology & biotechnology 44.9 (2017): 1273-1277.
+
+    ###############
+    # Metabolites #
+    ###############
+    acetoin     = Metabolite(id='acetoin_c', name = 'Acetoin, cytosol',
+                             formula = 'C4H8O2', compartment='c')
+    diacetyl    = Metabolite(id='diacetyl_c', name = 'Diacetyl, cytosol',
+                             formula = 'C4H6O2', compartment='c')
+    bdo         = Metabolite(id='bdo_c', name='Meso-2,3-butanediol, cytosol',
+                             formula = 'C4H10O2', compartment='c')
+    bdo_e       = Metabolite(id='bdo_e', name='Meso-2,3-butanediol, extracellular',
+                             formula = 'C4H10O2', compartment='e')
+    acetolactate= model.metabolites.alac__S_c
+    nadh        = model.metabolites.nadh_c
+    nad         = model.metabolites.nad_c
+    pyr         = model.metabolites.pyr_c
+    co2         = model.metabolites.co2_c
+    h           = model.metabolites.h_c
+    #############
+    # Reactions #
+    #############
+
+    # ALS: 2 pyr + h => alac + co2
+    als = Reaction(id = 'ALS', name = 'Acetolactate synthase [Plasmid](Enterobacter)')
+    als.add_metabolites({
+        pyr:-2,
+        h:-1,
+        acetolactate:1,
+        co2:1
+    })
+
+    # SALD: alac => diacetyl + co2 (spontaneous)
+    sald = Reaction(id = 'SALD', name = 'Spontaneous acetolactate decarboxylation [Plasmid]')
+    sald.add_metabolites({
+        acetolactate:-1,
+        diacetyl:1,
+        co2:1
+    })
+
+    # AR: diacetyl + NADH => acetoin + NAD+#############
+
+    ar = Reaction(id = 'AR', name = 'Acetoin oxidoreductase (NADH) [Plasmid]')
+    ar.add_metabolites({
+        diacetyl:-1,
+        nadh:-1,
+        h:-1,
+        acetoin:1,
+        nad:1
+    })
+
+    # BDH: acetoin + NADH => bdo + NAD+
+    bdh = Reaction(id = 'BDH', name = 'Butanediol dehydrogenase [Plasmid]')
+    bdh.add_metabolites({
+        acetoin:-1,
+        nadh:-1,
+        h:-1,
+        bdo:1,
+        nad:1
+    })
+
+    # BDO Transport
+    bdo_tp = Reaction(id='BDOtp', name = 'BDO Transport (extracellular)')
+    bdo_tp.add_metabolites({
+        bdo:-1,
+        bdo_e:1
+    })
+
+    # BDO Exchange
+    bdo_ex = Reaction(id='EX_BDO', name = 'BDO Exchange')
+    bdo_ex.add_metabolites({
+        bdo_e:-1,
+    })
+
+    reaction_list = [als, sald, ar, bdh, bdo_tp, bdo_ex]
+
+    #############
+    #   Genes   #
+    #############
+
+    ALS = ExpressedGene(id = 'EBA_als',
+                        name = 'acetolactate synthase',
+                        sequence = read_seq('data/ALS.txt'))
+    AR = ExpressedGene (id = 'EBA_ar',
+                        name = 'acetoin reductase',
+                        sequence = read_seq('data/AR.txt'))
+    fwd_als= 'GGAATTCCATATGAACAGTGAGAAACAGTC'
+    rev_als= 'CCGAGCTCTTACAAAATCTGGCTGAGAT'
+    fwd_ar = 'GGAATTCCATATGCAAAAAGTTGCTCTCGTAAC'
+    rev_ar = 'CCGAGCTCTTAGTTGAACACCATCCCAC'
+
+    pET = read_seq('data/pET.txt')
+
+    #############
+    #  Enzymes  #
+    #############
+
+    # ALS is 60 kDa, and is a homotrimer
+    # Kaushal, Anju, Sunil Pabbi, and Prince Sharma.
+    # "Characterization of 2, 3-butanediol-forming and valine-sensitive
+    # α-acetolactate synthase of Enterobacter cloacae."
+    # World Journal of Microbiology and Biotechnology 19.5 (2003): 487-493.
+
+    # ALS kcat is 26.9 +- 2.5[s-1]
+    # Choi, Myeong-Hyeon, et al.
+    # "Molecular cloning and expression of Enterobacter aerogenes α-acetolactate
+    # decarboxylase in pyruvate decarboxylase-deficient Saccharomyces cerevisiae
+    # for efficient 2, 3-butanediol production."
+    # Process biochemistry 51.2 (2016): 170-176.
+
+    ALS_enz = Enzyme(
+        id = 'ALS',
+        kcat=26.9*3600,
+        kdeg=kdeg_enz,
+        composition = {'EBA_als':3}
+    )
+
+    # The third enzyme in the 2,3-butanediol pathway, AR, purified from A.aerogenes
+    # consists of four equal-size sub-unitsof 25kDa(40).
+
+    # Blomqvist, Kristina, et al.
+    # "Characterization of the genes of the 2, 3-butanediol operons from Klebsiella
+    # terrigena and Enterobacter aerogenes."
+    # Journal of bacteriology 175.5 (1993): 1392-1404.
+    # &
+    # Stormer, Fredrik C.
+    # "[106] 2, 3-Butanediol biosynthetic system in Aerobacter aerogenes."
+    # Methods in enzymology. Vol. 41. Academic Press, 1975. 518-533.
+
+    AR_enz = Enzyme(
+        id = 'AR',
+        kcat=get_average_kcat(),
+        kdeg=kdeg_enz,
+        composition = {'EBA_ar':4}
+    )
+
+    # Probably the catalytic activities are different
+    BDH_enz = Enzyme(
+        id = 'BDH',
+        kcat=get_average_kcat(),
+        kdeg=kdeg_enz,
+        composition = {'EBA_ar':4}
+    )
+
+    ############
+    # Coupling #
+    ############
+
+    coupling_dict = {'ALS':[ALS_enz], 'AR':[AR_enz], 'BDH':[BDH_enz]}
+
+    # AR and BDH are catalyzed by the same enzyme
+
+    ###########
+    # Plasmid #
+    ###########
+
+    if has_rnap:
+        rnap = get_rnap()
+        rnap.id = 'plasmid_rnap'
+
+        nt_seq_ecoli = get_nt_sequences()
+        rnap_genes = list()
+        for g in rnap.composition:
+            this_gene = ExpressedGene(id='PLASMID_'+g,
+                                      name='{}, Plasmid',
+                                      sequence = nt_seq_ecoli[g])
+            rnap_genes.append(this_gene)
+    else:
+        rnap_genes = []
+        rnap = None
+
+    gene_list = [ALS,AR] + rnap_genes
+
+    plasmid_seq = pET \
+                  + fwd_als + ALS.sequence + rev_als \
+                  + fwd_ar + AR.sequence + rev_ar \
+                  + ''.join(str(g.sequence) for g in rnap_genes)
+
+    my_plasmid = Plasmid(id_ = 'pET-AR-ALS',
+                         sequence = plasmid_seq,
+                         genes = gene_list,
+                         reactions = reaction_list,
+                         coupling_dict = coupling_dict,
+                         rnap = rnap)
+    my_plasmid.build_default_mrna(kdeg_mrna)
+
+    return my_plasmid
\ No newline at end of file

From 76ed38e265ee2dc527550daf7119b4448050c63c Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 16 Oct 2019 15:59:06 +0200
Subject: [PATCH 02/19] FIX: give a sequence to the dummy gene, so that
 gene.sequence has a non-0 length

---
 etfl/core/memodel.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py
index d596be5..e752557 100644
--- a/etfl/core/memodel.py
+++ b/etfl/core/memodel.py
@@ -301,10 +301,11 @@ def add_dummies(self, nt_ratios, mrna_kdeg, mrna_length, aa_ratios,
         add_interpolation_variables(self)
 
         # Create a dummy gene and override the sequences with input data
+        dummy_sequence = 'N'*mrna_length
         dummy_gene = ExpressedGene(id='dummy_gene',
                                    name='Dummy Gene',
-                                   sequence='')
-        dummy_gene._rna = 'N'*mrna_length
+                                   sequence=dummy_sequence)
+        dummy_gene._rna = dummy_sequence
         dummy_gene._peptide = 'X'*peptide_length
 
         self.add_genes([dummy_gene])

From eaf831bead82fd5919d10708fbc2e6d6a1cecccf Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 16 Oct 2019 16:00:04 +0200
Subject: [PATCH 03/19] FIX: return name rather than constraint object in
 check_binding_constraints

---
 etfl/optim/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etfl/optim/utils.py b/etfl/optim/utils.py
index 8f881a1..acf43c3 100644
--- a/etfl/optim/utils.py
+++ b/etfl/optim/utils.py
@@ -456,7 +456,7 @@ def safe_optim(model):
 def get_binding_constraints(model, epsilon):
 
     if model.problem.__name__ == 'optlang.gurobi_interface':
-        return {kind:[c
+        return {kind:[c.name
                                for c in these_cons
                                if c.constraint._internal_constraint.Slack <= epsilon]
                 for kind,these_cons in model._cons_kinds.items()}

From 850c12846fa84eb8ac72075eabc76f27d3a87a2c Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 16 Oct 2019 16:01:08 +0200
Subject: [PATCH 04/19] ENH: add option to revert model after chebyshev center
 computation, or not. Useful for getting primal, slack, etc from the model, as
 they are erased if the model is modified

---
 etfl/analysis/dynamic.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/etfl/analysis/dynamic.py b/etfl/analysis/dynamic.py
index ad34f9a..0c5dfc7 100644
--- a/etfl/analysis/dynamic.py
+++ b/etfl/analysis/dynamic.py
@@ -361,7 +361,7 @@ def update_medium(t, Xi, Si, dmodel, medium_fun, timestep):
 
     return X,S
 
-def compute_center(dmodel, provided_solution=None):
+def compute_center(dmodel, provided_solution=None, revert_changes=True):
     """
     Fixes growth to be above computed lower bound, finds chebyshev center,
     resets the model, returns solution data
@@ -403,8 +403,9 @@ def compute_center(dmodel, provided_solution=None):
         dmodel.optimize()
         chebyshev_sol = dmodel.solution
 
-    dmodel.growth_reaction.lower_bound = prev_lb
-    dmodel.objective = prev_obj
+    if revert_changes:
+        dmodel.growth_reaction.lower_bound = prev_lb
+        dmodel.objective = prev_obj
     return chebyshev_sol
 
 

From d1ce0e61cc7cc2733e1ae8ca7b056fff1a505b1c Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 16 Oct 2019 16:02:15 +0200
Subject: [PATCH 05/19] ENH: add a debug plasmid

---
 work/plasmids/debug.yaml | 26 +++++++++++++++++
 work/plasmids/vector.py  | 62 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 work/plasmids/debug.yaml

diff --git a/work/plasmids/debug.yaml b/work/plasmids/debug.yaml
new file mode 100644
index 0000000..90cbfca
--- /dev/null
+++ b/work/plasmids/debug.yaml
@@ -0,0 +1,26 @@
+model: 'debug'
+tag: 'debug'
+simulation:
+  copy_number: 1
+  vector: 'debug'
+  add_rnap: yes
+assumptions:
+  growth_rate: 'auto'
+analysis:
+  # Export values ?
+  export_peptides: yes
+  export_mrna: yes
+  export_fluxes: yes
+  binding_constraints:
+    epsilon: 1.0e-8
+  # Perform Chebyshev centering ?
+  # chebyshev:
+  #   chebyshev_include:
+  #     - ForwardCatalyticConstraint
+  #     - BackwardCatalyticConstraint
+  #     - RNAPAllocation
+  #     - ExpressionCoupling
+  #   inplace: false
+options:
+  verbose: false
+  inplace: true
diff --git a/work/plasmids/vector.py b/work/plasmids/vector.py
index 70fa388..979eb89 100644
--- a/work/plasmids/vector.py
+++ b/work/plasmids/vector.py
@@ -9,6 +9,66 @@
 
 from cobra.core import Metabolite, Reaction
 
+def get_debug_plasmid(model, has_rnap=False):
+
+    h2o_c = model.metabolites.h2o_c
+    h2o_e = model.metabolites.h2o_e
+
+    # CO2 Exchange
+    h2o_tpp = Reaction(id='h2otpp', name='H2O Exchange')
+    h2o_tpp.add_metabolites({
+        h2o_c:  1,
+        h2o_e: -1,
+    })
+
+    reaction_list = [h2o_tpp]
+
+    #############
+    #   Genes   #
+    #############
+
+    DBG = ExpressedGene(id='DBG_gene',
+                        name='Debug synthase',
+                        sequence='AATTTCGGTTGA'.lower())
+
+    #############
+    #  Enzymes  #
+    #############
+
+    DBG_enz = Enzyme(
+        id='DBG',
+        kcat=65 * 3600,
+        kdeg=kdeg_enz,
+        composition={'DBG_gene': 3}
+    )
+
+    ############
+    # Coupling #
+    ############
+
+    coupling_dict = {'h2otpp': [DBG_enz]}
+
+    ###########
+    # Plasmid #
+    ###########
+
+    if 1:
+        rnap_genes = []
+        rnap = None
+
+    gene_list = [DBG] + rnap_genes
+
+    plasmid_seq = DBG.sequence
+
+    my_plasmid = Plasmid(id_='debug',
+                         sequence=plasmid_seq,
+                         genes=gene_list,
+                         reactions=reaction_list,
+                         coupling_dict=coupling_dict,
+                         rnap=rnap)
+    my_plasmid.build_default_mrna(kdeg_mrna)
+
+    return my_plasmid
 
 def get_bdo_plasmid(model, has_rnap=False):
 
@@ -212,4 +272,4 @@ def get_bdo_plasmid(model, has_rnap=False):
                          rnap = rnap)
     my_plasmid.build_default_mrna(kdeg_mrna)
 
-    return my_plasmid
\ No newline at end of file
+    return my_plasmid

From 1d75ea611e1b3b50e70aa489a531f38b7c143cc4 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Wed, 16 Oct 2019 16:04:31 +0200
Subject: [PATCH 06/19] ENH: Wrapper for performing plasmid integration via 
 YAML-spec

---
 work/plasmids/analysis.py | 73 ++++++++++++++++++++++++++++++++++++---
 work/plasmids/main.py     | 22 +++++++++---
 2 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py
index a16fb53..8861c5b 100644
--- a/work/plasmids/analysis.py
+++ b/work/plasmids/analysis.py
@@ -1,5 +1,13 @@
 from etfl.optim.utils import check_solution
 from etfl.optim.variables import mRNAVariable, EnzymeVariable
+from etfl.analysis.utils import enzymes_to_peptides_conc
+from etfl.analysis.summary import print_standard_sol
+
+from pytfa.io.viz import export_reactions_for_escher
+
+import pandas as pd
+
+from collections import OrderedDict
 
 
 def chebyshev(model, chebyshev_include, inplace=True, big_m=1000):
@@ -10,6 +18,10 @@ def chebyshev(model, chebyshev_include, inplace=True, big_m=1000):
     chebyshev_variables = model.get_variables_of_type(mRNAVariable)
     chebyshev_variables += model.get_variables_of_type(EnzymeVariable)
 
+    initial_solution = model.optimize()
+
+    the_obj = model.objective
+
     chebyshev_center(model, chebyshev_variables,
                  inplace=inplace,
                  big_m=big_m,
@@ -21,19 +33,70 @@ def chebyshev(model, chebyshev_include, inplace=True, big_m=1000):
 
     # We want to compute the center under the constraint of the growth at the
     # initial solution.
-    chebyshev_sol = compute_center(model, provided_solution=initial_solution)
+    chebyshev_sol = compute_center(model, provided_solution=initial_solution,
+                                   revert_changes=False)
     print_standard_sol(model)
+    return chebyshev_sol
 
 
 def export_mrna(model, tag, solution=None):
-    solution = check_solution(solution)
+    solution = check_solution(model, solution)
 
-    mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('id')
+    mrna_var_ids = model.get_variables_of_type(mRNAVariable).list_attr('name')
 
     data = solution.raw
     mrna_data = data[mrna_var_ids]
     mrna_data.name = tag
 
-    filename = 'outputs/' + tag + 'mrna' + '.csv'
+    filename = 'outputs/' + tag + '_mrna' + '.csv'
+
+    mrna_data.to_csv(filename, header=True)
+    return mrna_data
+
+def export_peptides(model, tag, solution=None):
+    solution = check_solution(model, solution)
+
+    enz_var_ids = model.get_variables_of_type(EnzymeVariable).list_attr('name')
+
+    data = solution.raw
+    enz_data = data[enz_var_ids]
+    enz_data.name = tag
+
+    pep_data = enzymes_to_peptides_conc(model=model, enzyme_conc=enz_data)
+    pep_data = pd.DataFrame.from_dict(pep_data, orient='index')
+
+    filename = 'outputs/' + tag + '_pep' + '.csv'
+
+    pep_data.to_csv(filename, header=True)
+    return pep_data
+
+
+def export_fluxes(model, tag, solution=None):
+    solution = check_solution(model, solution)
+
+    filename = 'outputs/' + tag + '_rxn' + '.csv'
+
+    export_reactions_for_escher(model, solution.raw, filename)
+    return solution.fluxes
+
+
+def binding_constraints(model, tag, epsilon, solution=None):
+    from etfl.optim.utils import get_binding_constraints
+    # solution = check_solution(model, solution)
+    model.optimize()
+
+    filename = 'outputs/' + tag + '_binding_cons' + '.csv'
+
+    binding_constraints = get_binding_constraints(model, epsilon)
+
+    name2bind = OrderedDict()
+
+    for cons_kind, cons_names in binding_constraints.items():
+        for the_name in cons_names:
+            name2bind[the_name] = cons_kind
+
+    df = pd.DataFrame.from_dict(name2bind, orient='index')
+    df.columns = ['name','type']
 
-    mrna_data.to_csv(filename, header=True)
\ No newline at end of file
+    df.to_csv(filename, header=True)
+    return df
diff --git a/work/plasmids/main.py b/work/plasmids/main.py
index aa31561..c0196e6 100644
--- a/work/plasmids/main.py
+++ b/work/plasmids/main.py
@@ -17,7 +17,8 @@
 import sys
 
 from utils import read_config
-from vector import get_bdo_plasmid
+from vector import get_bdo_plasmid, get_debug_plasmid
+import analysis
 
 if len(sys.argv)<=1:
     raise ValueError('Please provide a YAML configuration file')
@@ -26,7 +27,8 @@
 
 print('Using configuration file: {}'.format(CONFIG))
 
-vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid}
+vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid,
+          'debug':get_debug_plasmid}
 
 if __name__ == '__main__':
     config = read_config(CONFIG)
@@ -57,10 +59,20 @@
     transmodel.optimize()
     print_standard_sol(transmodel)
 
-    for analysis,arguments in config['analysis']:
+    outputs = dict()
+
+    if 'chebyshev' in config['analysis']:
+        arguments = config['analysis'].pop('chebyshev')
+        arguments['model'] = transmodel
+        # arguments['tag'] = config['tag']
+        outputs['chebyshev'] = analysis.chebyshev(**arguments)
+
+    for analysis_str,arguments in config['analysis'].items():
         if arguments != False:
+            if arguments == True:
+                arguments = dict()
             arguments['model'] = transmodel
             arguments['tag'] = config['tag']
-            analysis(**arguments)
+            outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments)
 
-        # Check antibiotic resistance is expressed ?
\ No newline at end of file
+        # Check antibiotic resistance is expressed ?

From d8a453241a60d9106f3c8efb7f8294f9643ac220 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 25 Oct 2019 09:28:18 +0200
Subject: [PATCH 07/19] ENH: added objective argument to compute center in case
 of non-single radius

---
 etfl/analysis/dynamic.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/etfl/analysis/dynamic.py b/etfl/analysis/dynamic.py
index 0c5dfc7..4f90db0 100644
--- a/etfl/analysis/dynamic.py
+++ b/etfl/analysis/dynamic.py
@@ -361,12 +361,14 @@ def update_medium(t, Xi, Si, dmodel, medium_fun, timestep):
 
     return X,S
 
-def compute_center(dmodel, provided_solution=None, revert_changes=True):
+def compute_center(dmodel, objective,
+                   provided_solution=None, revert_changes=True):
     """
     Fixes growth to be above computed lower bound, finds chebyshev center,
     resets the model, returns solution data
 
     :param dmodel:
+    :param objective: the radius to maximize
     :return:
     """
 
@@ -386,12 +388,13 @@ def compute_center(dmodel, provided_solution=None, revert_changes=True):
 
     fix_growth(dmodel, the_solution)
 
+    dmodel.objective = objective #dmodel.chebyshev_radius.radius.variable
     try:
-        dmodel.objective = dmodel.chebyshev_radius.radius.variable
         dmodel.optimize()
         chebyshev_sol = dmodel.solution
 
-        release_growth(dmodel)
+        if revert_changes:
+            release_growth(dmodel)
     except AttributeError: #does not solve
         dmodel.logger.warning('Chebyshev solution at fixed growth solution '
                               'not found - relaxing integers and solving '
@@ -475,7 +478,9 @@ def run_dynamic_etfl(model, timestep, tfinal, uptake_fun, medium_fun,
 
     # We want to compute the center under the constraint of the growth at the
     # initial solution.
-    chebyshev_sol = compute_center(dmodel,provided_solution=initial_solution)
+    chebyshev_sol = compute_center(dmodel,
+                                   objective = dmodel.chebyshev_radius.radius.variable,
+                                   provided_solution=initial_solution)
 
     # Only now do we add the dynamic variable constraints.
     add_dynamic_variables_constraints(dmodel, timestep, dynamic_constraints)
@@ -562,4 +567,4 @@ def run_dynamic_etfl(model, timestep, tfinal, uptake_fun, medium_fun,
     return wrap_time_sol(var_solutions, obs_values)
 
 def wrap_time_sol(var_solutions, obs_values):
-    return pd.concat([var_solutions,obs_values], axis=0)
\ No newline at end of file
+    return pd.concat([var_solutions,obs_values], axis=0)

From af3012ab0512ece9aaf76d48e7733770fe254794 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 25 Oct 2019 09:28:42 +0200
Subject: [PATCH 08/19] MNT: Cleanup

---
 etfl/optim/utils.py       | 4 +++-
 etfl/tests/small_model.py | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/etfl/optim/utils.py b/etfl/optim/utils.py
index acf43c3..926e228 100644
--- a/etfl/optim/utils.py
+++ b/etfl/optim/utils.py
@@ -455,8 +455,10 @@ def safe_optim(model):
 
 def get_binding_constraints(model, epsilon):
 
-    if model.problem.__name__ == 'optlang.gurobi_interface':
+    if is_gurobi(model):
         return {kind:[c.name
                                for c in these_cons
                                if c.constraint._internal_constraint.Slack <= epsilon]
                 for kind,these_cons in model._cons_kinds.items()}
+    else:
+        raise(NotImplementedError)
diff --git a/etfl/tests/small_model.py b/etfl/tests/small_model.py
index 7e4d47a..0a4d51e 100644
--- a/etfl/tests/small_model.py
+++ b/etfl/tests/small_model.py
@@ -38,11 +38,11 @@
 GUROBI = 'optlang-gurobi'
 GLPK = 'optlang-glpk'
 
-solver = GLPK
+DEFAULT_SOLVER = GLPK
 aa_dict, rna_nucleotides, rna_nucleotides_mp, dna_nucleotides = get_monomers_dict()
 essentials = get_essentials()
 
-def create_fba_model(solver = GLPK):
+def create_fba_model(solver = DEFAULT_SOLVER):
 
     # g6p_c = cobra.Metabolite(id = 'g6p_c', formula = 'C6H13O9P')
     # f6p_c = cobra.Metabolite(id = 'f6p_c', formula = 'C6H13O9P')
@@ -125,6 +125,7 @@ def create_etfl_model(has_thermo, has_neidhardt,
                       n_mu_bins = 64,
                       mu_max = 3,
                       optimize = True,
+                      solver=DEFAULT_SOLVER,
                       ):
     #------------------------------------------------------------
     # Initialisation

From fb67c9bc2c55a6b32d12f8a3cc803f43c18ef87e Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 25 Oct 2019 09:30:26 +0200
Subject: [PATCH 09/19] ENH: chebyshev 3 separate centers + slacks export +
 folder export

---
 work/plasmids/analysis.py                    | 139 +++++++++++++++----
 work/plasmids/debug.yaml                     |  21 ++-
 work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml |  19 ++-
 work/plasmids/main.py                        |  13 +-
 4 files changed, 152 insertions(+), 40 deletions(-)

diff --git a/work/plasmids/analysis.py b/work/plasmids/analysis.py
index 8861c5b..c3e3630 100644
--- a/work/plasmids/analysis.py
+++ b/work/plasmids/analysis.py
@@ -1,5 +1,5 @@
 from etfl.optim.utils import check_solution
-from etfl.optim.variables import mRNAVariable, EnzymeVariable
+from etfl.optim.variables import mRNAVariable, EnzymeVariable, DNAVariable, RibosomeUsage, RNAPUsage
 from etfl.analysis.utils import enzymes_to_peptides_conc
 from etfl.analysis.summary import print_standard_sol
 
@@ -9,33 +9,75 @@
 
 from collections import OrderedDict
 
+def mkdir(directory):
+    import os
+    full_dir = 'outputs/' + directory
+    if not os.path.exists(full_dir):
+        os.makedirs(full_dir)
 
 def chebyshev(model, chebyshev_include, inplace=True, big_m=1000):
-    from etfl.optim.variables import mRNAVariable,EnzymeVariable
+    from etfl.optim.variables import mRNAVariable,EnzymeVariable, RibosomeUsage, RNAPUsage
     from etfl.optim.constraints import ForwardCatalyticConstraint,BackwardCatalyticConstraint
-    from etfl.analysis.dynamic import chebyshev_center, compute_center
-
-    chebyshev_variables = model.get_variables_of_type(mRNAVariable)
-    chebyshev_variables += model.get_variables_of_type(EnzymeVariable)
-
-    initial_solution = model.optimize()
-
-    the_obj = model.objective
-
-    chebyshev_center(model, chebyshev_variables,
-                 inplace=inplace,
-                 big_m=big_m,
-                 include=chebyshev_include)
-
-    # Revert to the previous objective, as chebyshev_center sets it to maximize
-    # the radius
-    model.objective = the_obj
+    from pytfa.analysis.chebyshev import chebyshev_transform, \
+        get_cons_var_classes, get_variables
+    from etfl.analysis.dynamic import compute_center
+
+    if not inplace:
+        new = model.copy()
+    else:
+        new = model
+
+    new.objective = new.growth_reaction
+    initial_solution = new.optimize()
+    print('Initial solution:')
+    print_standard_sol(new,solution=initial_solution)
+
+    # List of radii to add
+    radii = list()
+
+    the_obj = new.objective
+
+    for this_cons_type in chebyshev_include:
+        if this_cons_type == 'CatalyticConstraint':
+            # chebyshev_variables = new.get_variables_of_type(EnzymeVariable)
+            # vars = get_variables(new, chebyshev_variables)
+            include_list = get_cons_var_classes(new, ['ForwardCatalyticConstraint', 'BackwardCatalyticConstraint'],
+                                                type = 'cons')
+            this_r_id = 'catalytic'
+        elif this_cons_type =='ExpressionCoupling':
+            # chebyshev_variables = new.get_variables_of_type(mRNAVariable)
+            # chebyshev_variables = new.get_variables_of_type(RibosomeUsage)
+            # vars = get_variables(new, chebyshev_variables)
+            include_list = get_cons_var_classes(new, [this_cons_type], type='cons')
+            this_r_id = 'translation'
+        elif this_cons_type =='RNAPAllocation':
+            # chebyshev_variables = new.get_variables_of_type(DNAVariable)
+            # chebyshev_variables = new.get_variables_of_type(RNAPUsage)
+            # vars = get_variables(new, chebyshev_variables)
+            include_list = get_cons_var_classes(new, [this_cons_type], type='cons')
+            this_r_id = 'transcription'
+        else:
+            include_list = get_cons_var_classes(new, [this_cons_type], type='cons')
+            this_r_id = this_cons_type
+
+
+        r = chebyshev_transform(model=new,
+                                vars=list(),
+                                # vars=vars,
+                                include_list=include_list,
+                                exclude_list=list(),
+                                radius_id=this_r_id)
+        radii.append(r)
+
+    regularisation =  0.001*new.enzymes.dummy_enzyme.scaled_concentration
+    cheby_obj = sum(radii) + regularisation
 
     # We want to compute the center under the constraint of the growth at the
     # initial solution.
-    chebyshev_sol = compute_center(model, provided_solution=initial_solution,
+    chebyshev_sol = compute_center(new, provided_solution=initial_solution,
+                                   objective=cheby_obj,
                                    revert_changes=False)
-    print_standard_sol(model)
+    print_standard_sol(new, solution=chebyshev_sol)
     return chebyshev_sol
 
 
@@ -48,7 +90,8 @@ def export_mrna(model, tag, solution=None):
     mrna_data = data[mrna_var_ids]
     mrna_data.name = tag
 
-    filename = 'outputs/' + tag + '_mrna' + '.csv'
+    mkdir(tag)
+    filename = 'outputs/' + tag + '/mrna' + '.csv'
 
     mrna_data.to_csv(filename, header=True)
     return mrna_data
@@ -65,7 +108,8 @@ def export_peptides(model, tag, solution=None):
     pep_data = enzymes_to_peptides_conc(model=model, enzyme_conc=enz_data)
     pep_data = pd.DataFrame.from_dict(pep_data, orient='index')
 
-    filename = 'outputs/' + tag + '_pep' + '.csv'
+    mkdir(tag)
+    filename = 'outputs/' + tag + '/pep' + '.csv'
 
     pep_data.to_csv(filename, header=True)
     return pep_data
@@ -74,7 +118,8 @@ def export_peptides(model, tag, solution=None):
 def export_fluxes(model, tag, solution=None):
     solution = check_solution(model, solution)
 
-    filename = 'outputs/' + tag + '_rxn' + '.csv'
+    mkdir(tag)
+    filename = 'outputs/' + tag + '/rxn' + '.csv'
 
     export_reactions_for_escher(model, solution.raw, filename)
     return solution.fluxes
@@ -85,7 +130,8 @@ def binding_constraints(model, tag, epsilon, solution=None):
     # solution = check_solution(model, solution)
     model.optimize()
 
-    filename = 'outputs/' + tag + '_binding_cons' + '.csv'
+    mkdir(tag)
+    filename = 'outputs/' + tag + '/binding_cons' + '.csv'
 
     binding_constraints = get_binding_constraints(model, epsilon)
 
@@ -96,7 +142,48 @@ def binding_constraints(model, tag, epsilon, solution=None):
             name2bind[the_name] = cons_kind
 
     df = pd.DataFrame.from_dict(name2bind, orient='index')
-    df.columns = ['name','type']
+    df.columns = ['type']
+    df.index.name = 'name'
+
+    df.to_csv(filename, header=True)
+    return df
+
+
+def export_slacks(model, tag, constraint_types, solution=None):
+    from pytfa.analysis.chebyshev import get_cons_var_classes, ChebyshevRadius
+
+    solution = check_solution(model, solution)
+    include_list = get_cons_var_classes(model, constraint_types,
+                                type = 'cons')
+    cons_dict = {}
+
+    if model.problem.__name__ == 'optlang.gurobi_interface':
+        for cons_class in include_list:
+            these_cons = model.get_constraints_of_type(cons_class)
+            cons_dict.update({c.name:(c.constraint._internal_constraint.Slack,
+                                      get_radius(c.expr),
+                                      c.__class__.__name__)
+                               for c in these_cons})
+
+    mkdir(tag)
+    filename = 'outputs/' + tag + '/slacks' + '.csv'
+
+    df = pd.DataFrame.from_dict(cons_dict, orient='index')
+    df.columns = ['slack','radius','type']
+    df.index.name = 'name'
 
     df.to_csv(filename, header=True)
     return df
+
+def get_radius(expr):
+    """
+    Returns the value of the sum of Chebyshev radii in a constraint expression
+
+    :param cons:
+    :return:
+    """
+    from pytfa.analysis.chebyshev import ChebyshevRadius
+    variables = expr.free_symbols
+    total = sum([v.primal for v in variables
+                 if v.name.startswith(ChebyshevRadius.prefix)])
+    return total
\ No newline at end of file
diff --git a/work/plasmids/debug.yaml b/work/plasmids/debug.yaml
index 90cbfca..ce6f2aa 100644
--- a/work/plasmids/debug.yaml
+++ b/work/plasmids/debug.yaml
@@ -13,14 +13,21 @@ analysis:
   export_fluxes: yes
   binding_constraints:
     epsilon: 1.0e-8
+  export_slacks:
+    constraint_types:
+      - ForwardCatalyticConstraint
+      - BackwardCatalyticConstraint
+      - SynthesisConstraint
+      - RNAPAllocation
+      - ExpressionCoupling
   # Perform Chebyshev centering ?
-  # chebyshev:
-  #   chebyshev_include:
-  #     - ForwardCatalyticConstraint
-  #     - BackwardCatalyticConstraint
-  #     - RNAPAllocation
-  #     - ExpressionCoupling
-  #   inplace: false
+  chebyshev:
+    chebyshev_include:
+      - CatalyticConstraint
+      - RNAPAllocation
+      - ExpressionCoupling
+    inplace: true
 options:
   verbose: false
   inplace: true
+  solver: gurobi
diff --git a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
index aab6683..8890f1a 100644
--- a/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
+++ b/work/plasmids/iJO1366_pET-AR-ALS_1_copy.yaml
@@ -1,5 +1,5 @@
-model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191007_142013.json'
-tag: 'vEFL_BDO_1c_RNAP'
+model: '../../tutorials/models/iJO1366_vEFL_v_0.10_431_enz_128_bins__20191014_124839.json'
+tag: 'vEFL_BDO_1c_RNAP_cheby_sum_reg'
 simulation:
   copy_number: 1
   vector: 'plasmid_pET-AR-ALS'
@@ -11,14 +11,23 @@ analysis:
   export_peptides: yes
   export_mrna: yes
   export_fluxes: yes
+  binding_constraints:
+    epsilon: 1.0e-8
+  export_slacks:
+    constraint_types:
+      - ForwardCatalyticConstraint     
+      - BackwardCatalyticConstraint    
+      - SynthesisConstraint     
+      - RNAPAllocation     
+      - ExpressionCoupling 
   # Perform Chebyshev centering ?
   chebyshev:
     chebyshev_include:
-      - ForwardCatalyticConstraint
-      - BackwardCatalyticConstraint
+      - CatalyticConstraint
       - RNAPAllocation
       - ExpressionCoupling
-    inplace: false
+    inplace: true
 options:
   verbose: false
   inplace: true
+  solver: gurobi
diff --git a/work/plasmids/main.py b/work/plasmids/main.py
index c0196e6..856f860 100644
--- a/work/plasmids/main.py
+++ b/work/plasmids/main.py
@@ -30,14 +30,22 @@
 vec_dict={'plasmid_pET-AR-ALS':get_bdo_plasmid,
           'debug':get_debug_plasmid}
 
+def write_conf(model, conf):
+    filename = 'outputs/' + conf['tag'] + '/config.yaml'
+    conf['objective'] = model.objective.expression
+    with open(filename,'w') as fid:
+        yaml.dump(config, fid)
+
+
+
 if __name__ == '__main__':
     config = read_config(CONFIG)
 
     if config['model'] != 'debug':
-        model = load_json_model(config['model'])
+        model = load_json_model(config['model'],solver=config['options']['solver'])
     else:
         from etfl.tests.small_model import create_etfl_model
-        model = create_etfl_model(0,1)
+        model = create_etfl_model(0,1, solver=config['options']['solver'])
 
     standard_solver_config(model)
     model.solver.configuration.verbosity = config['options']['verbose']
@@ -76,3 +84,4 @@
             outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments)
 
         # Check antibiotic resistance is expressed ?
+    write_conf(transmodel, config)

From 860e01027dc9ee4700425d4dd7dd02bcf91f8ae7 Mon Sep 17 00:00:00 2001
From: Omid <mohammad.oftadeh@epfl.ch>
Date: Wed, 30 Oct 2019 10:16:36 +0100
Subject: [PATCH 10/19] FIX: fix sequence type on reloading a model.

---
 etfl/io/dict.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/etfl/io/dict.py b/etfl/io/dict.py
index 176bf17..09ef703 100644
--- a/etfl/io/dict.py
+++ b/etfl/io/dict.py
@@ -12,6 +12,9 @@
 
 from tqdm import tqdm
 
+from Bio.Seq import Seq
+from Bio.Alphabet import DNAAlphabet, RNAAlphabet, ProteinAlphabet
+
 import cobra.io.dict as cbd
 from cobra.exceptions import SolverNotFound
 from optlang.util import expr_to_json, parse_expr
@@ -892,8 +895,8 @@ def find_genes_from_dict(new, obj):
             g = replace_by_me_gene(new, gene_dict['id'], str(sequence))
             if key == 'expressed_genes':
                 # Newer models
-                g._rna            = gene_dict['rna']
-                g._peptide        = gene_dict['peptide']
+                g._rna            = Seq(gene_dict['rna'], alphabet=RNAAlphabet())
+                g._peptide        = Seq(gene_dict['peptide'], alphabet=ProteinAlphabet())
                 g._copy_number    = gene_dict['copy_number']
                 g._transcribed_by = [new.enzymes.get_by_id(e)
                                      for e in gene_dict['transcribed_by']] \

From a8fb5bf8dc36b2337d585cac72f566f2a310faf9 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:05:57 +0100
Subject: [PATCH 11/19] ENH: changed the hack to create a transmodel from an
 ETFL model to make it more robust

---
 etfl/core/vector.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/etfl/core/vector.py b/etfl/core/vector.py
index 0fc13eb..8a9be52 100644
--- a/etfl/core/vector.py
+++ b/etfl/core/vector.py
@@ -86,26 +86,35 @@ def make_plasmid(gene_list):
 class TransModel(MEModel):
 
     def __init__(self, me_model, inplace = True):
+        """
+        Hack to subclass instantiated object:
+        https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass
+
+        :param me_model:
+        :param inplace:
+        """
         if not inplace:
             new = me_model.copy()
+            self.__dict__ = new.__dict__
         else:
-            new = me_model
+            # new = me_model
+            self.__dict__ = me_model.__dict__
 
-        self._me_model = new
+        # self._me_model = new
         self._has_transcription_changed = False
         self._has_translation_changed = False
         self.vectors = dict()
         self.vector_copy_number = dict()
 
-    def __getattr__(self,attr):
-        """
-        Hack to subclass instantiated object:
-        https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass
-
-        :param attr:
-        :return:
-        """
-        return getattr(self._me_model,attr)
+    # def __getattr__(self,attr):
+    #     """
+    #     Hack to subclass instantiated object:
+    #     https://stackoverflow.com/questions/33463232/subclassing-in-python-of-instantiated-superclass
+    #
+    #     :param attr:
+    #     :return:
+    #     """
+    #     return getattr(self._me_model,attr)
 
     def add_vector(self, vector,
                    copy_number=1):

From 505cc0f6c00923aeaf1e6647c4b8ba027b23d77d Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:07:03 +0100
Subject: [PATCH 12/19] ENH: global improvmements in all parts of vector.py

---
 etfl/core/vector.py | 62 ++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/etfl/core/vector.py b/etfl/core/vector.py
index 8a9be52..b000a37 100644
--- a/etfl/core/vector.py
+++ b/etfl/core/vector.py
@@ -15,7 +15,7 @@
     DNA_WEIGHT_CONS_ID, MRNA_WEIGHT_VAR_ID, PROT_WEIGHT_VAR_ID, \
     DNA_WEIGHT_VAR_ID, DNA_FORMATION_RXN_ID,\
     define_prot_weight_constraint, define_mrna_weight_constraint, \
-    define_dna_weight_constraint
+    define_dna_weight_constraint, get_dna_synthesis_mets
 from ..optim.constraints import tRNAMassBalance, InterpolationConstraint, \
     TotalCapacity, EnzymeRatio
 from ..optim.variables import InterpolationVariable
@@ -130,17 +130,20 @@ def add_vector(self, vector,
         if vector.ribosome is not None:
             self.add_vector_ribosome(vector.ribosome)
 
-        # Edit the gene copy number by the number of plasmids.
-        for g in vector.genes:
-            g.copy_number *= copy_number
-
         self.add_genes(vector.genes)
+
         # This adds the peptides as well:
         self.add_nucleotide_sequences({g.id:g.sequence for g in vector.genes})
+        # Edit the gene copy number by the number of plasmids.
+        # TODO: make this more robust / Edit add_nt_seq to include gene copy #
+        for g in vector.genes:
+            self.genes.get_by_id(g.id).copy_number = g.copy_number * copy_number
+
         self.express_genes(vector.genes)
 
         # Add the mRNAs
         self.add_mrnas(vector.mrna_dict.values())
+        self._push_queue()
 
         #
 
@@ -167,13 +170,12 @@ def add_vector(self, vector,
         # recompute mRNA-dependent constraints
         self.recompute_trna_balances()
 
-        if self._has_transcription_changed:
-            self.recompute_transcription()
-        if self._has_translation_changed:
-            self.recompute_translation()
+        self.recompute_transcription()
+        self.recompute_translation()
 
         # This needs to account for new DNA
         self.recompute_allocation()
+        self._push_queue()
 
     def recompute_trna_balances(self):
 
@@ -212,10 +214,6 @@ def add_vector_RNAP(self, rnap):
         # This adds the RNAP as an enzyme, and also enforces its free ratio
         self.add_rnap(rnap, free_ratio=free_rnap_ratio)
 
-        self._has_transcription_changed = True
-
-        pass
-
     def add_vector_ribosome(self, ribosome):
         """
         Adds the vector's ribosome to the ribosome pool of the cell
@@ -225,25 +223,19 @@ def add_vector_ribosome(self, ribosome):
         """
         self.add_ribosome(ribosome, free_ratio=free_rib_ratio)
 
-        self._has_translation_changed = True
-
-        pass
-
     def recompute_translation(self):
         """
 
         :return:
         """
 
-        #1.1 Remove translation catalytic constraints
+        for rib_id in self.ribosome:
+            #1.1 Find the RNAP capacity constraint
+            cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rib_id)
 
-        #1.2 Apply new translation catalytic constraints
-
-        #2.1 Remove former ribosome balance
-
-        #2.2 Apply new ribosome balance
-
-        pass
+            #1.2 Edit the RNAP capacity constraint
+            new_total_capacity = self._get_rib_total_capacity()
+            cons.change_expr(new_total_capacity)
 
     def recompute_transcription(self):
         """
@@ -251,12 +243,15 @@ def recompute_transcription(self):
         :return:
         """
 
-        #1.1 Find the RNAP capacity constraint
-        cons = self.get_constraints_of_type(TotalCapacity).get_by_id('rnap')
+        # TODO: No need to recompute for the one we just added.
+        # Keep tabs ? use an except list
+        for rnap_id in self.rnap:
+            # 1.1 Find the RNAP capacity constraint
+            cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rnap_id)
 
-        #1.2 Edit the RNAP capacity constraint
-        new_total_capacity = self._get_rnap_total_capacity()
-        cons.change_expr(new_total_capacity)
+            # 1.2 Edit the RNAP capacity constraint
+            new_total_capacity = self._get_rnap_total_capacity()
+            cons.change_expr(new_total_capacity)
 
     def recompute_allocation(self):
         """
@@ -336,10 +331,9 @@ def recalculate_dna(self, dna_ggdw):
         new_gc = (wt_chromosome_len * wt_gc + vector_dna_len * vector_gc) / new_chromosome_len
 
         #3. Update ATGC stoichiometries
-        dna_formation_reaction.add_metabolites({
-            v: -1 * new_chromosome_len * (new_gc if k.lower() in 'gc' else 1 - new_gc)
-            for k, v in self.dna_nucleotides.items()
-        }, combine = False)
+        ppi = self.essentials['ppi']
+        mets = get_dna_synthesis_mets(self, new_chromosome_len, new_gc, ppi)
+        dna_formation_reaction.add_metabolites(mets, combine = False)
 
         ##. Redefine the DNA weight constraint
         dna.len = new_chromosome_len

From 4420e873a4d5adf11f78a1c03db685539203742c Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:09:22 +0100
Subject: [PATCH 13/19] REF: cleand up getting stoichiometries and constraints
 fro DNA and Ribosome

---
 etfl/core/allocation.py | 17 +++++++++++------
 etfl/core/memodel.py    | 35 ++++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/etfl/core/allocation.py b/etfl/core/allocation.py
index ed82e79..e7e0bbd 100644
--- a/etfl/core/allocation.py
+++ b/etfl/core/allocation.py
@@ -399,12 +399,7 @@ def add_dna_mass_requirement(model, mu_values, dna_rel, gc_ratio,
                                  scaled=True)
     model.add_reactions([dna_formation])
 
-    # In this formulation, we make 1 unit of the whole chromosome with NTPs
-    g = gc_ratio
-    mets = {v: -1 * chromosome_len * (g if k.lower() in 'gc' else 1 - g)
-            for k, v in model.dna_nucleotides.items()}
-    # Don't forget to release ppi (2 ppi per bp)
-    mets[ppi] = 2 * chromosome_len
+    mets = get_dna_synthesis_mets(model, chromosome_len, gc_ratio, ppi)
 
     dna_formation.add_metabolites(mets)
 
@@ -443,6 +438,16 @@ def add_dna_mass_requirement(model, mu_values, dna_rel, gc_ratio,
     model.regenerate_constraints()
 
 
+def get_dna_synthesis_mets(model, chromosome_len, gc_ratio, ppi):
+    # In this formulation, we make 1 unit of the whole chromosome with NTPs
+    g = gc_ratio
+    mets = {v: -1 * chromosome_len * (g if k.lower() in 'gc' else 1 - g)
+            for k, v in model.dna_nucleotides.items()}
+    # Don't forget to release ppi (2 ppi per bp)
+    mets[ppi] = 2 * chromosome_len
+    return mets
+
+
 def define_dna_weight_constraint(model, dna, dna_ggdw, gc_content, chromosome_len):
     # DNA mass (BioPython has g.mol^-1, while we are in mmol)
     ma = molecular_weight('A', seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py
index e752557..2d69086 100644
--- a/etfl/core/memodel.py
+++ b/etfl/core/memodel.py
@@ -1616,23 +1616,7 @@ def _populate_ribosomes(self):
         # 3 -> Add ribosomal capacity constraint
         self.regenerate_variables()
 
-        free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(x)
-                     for x in self.ribosome]
-        # CATCH : This is summing ~1500+ variable objects, and for a reason
-        # sympy does not like it. Let's cut it in smaller chunks and sum
-        # afterwards
-        # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage))
-        all_ribosome_usage = self.get_variables_of_type(RibosomeUsage)
-
-        # sum_RPs = chunk_sum(all_ribosome_usage)
-        sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage])
-
-        # The total RNAP capacity constraint looks like
-        # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj)
-        ribo_usage = sum_RPs \
-                + sum([x.unscaled for x in free_ribosome]) \
-                - sum([x.concentration for x in self.ribosome.values()])
-        ribo_usage /= min([x.scaling_factor for x in self.ribosome.values()])
+        ribo_usage = self._get_rib_total_capacity()
         # For nondimensionalization
         # ribo_usage = (sum_RPs + self.Rf.unscaled - self.ribosome.concentration) \
         #              / self.ribosome.scaling_factor
@@ -1650,6 +1634,23 @@ def _populate_ribosomes(self):
         self.regenerate_constraints()
         self.regenerate_variables()
 
+    def _get_rib_total_capacity(self):
+        free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(x)
+                         for x in self.ribosome]
+        # CATCH : This is summing ~1500+ variable objects, and for a reason
+        # sympy does not like it. Let's cut it in smaller chunks and sum
+        # afterwards
+        # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage))
+        all_ribosome_usage = self.get_variables_of_type(RibosomeUsage)
+        # sum_RPs = chunk_sum(all_ribosome_usage)
+        sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage])
+        # The total RNAP capacity constraint looks like
+        # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj)
+        ribo_usage = sum_RPs \
+                     + sum([x.unscaled for x in free_ribosome]) \
+                     - sum([x.concentration for x in self.ribosome.values()])
+        ribo_usage /= min([x.scaling_factor for x in self.ribosome.values()])
+        return ribo_usage
 
     def apply_ribosomal_catalytic_constraint(self, reaction):
         """

From c287b898a8f258cc22779b710eba8e3b21e4585d Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:09:49 +0100
Subject: [PATCH 14/19] FIX: more robust handling of genes

---
 etfl/core/genes.py         | 6 +++---
 etfl/core/macromolecule.py | 8 ++++++--
 etfl/core/memodel.py       | 6 +++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/etfl/core/genes.py b/etfl/core/genes.py
index 29e5584..c0c1267 100644
--- a/etfl/core/genes.py
+++ b/etfl/core/genes.py
@@ -47,7 +47,7 @@ def __init__(self, id, name, sequence, copy_number=1,
         self._rna = ''
         self._peptide = ''
 
-        self._copy_number = copy_number
+        self._copy_number = int(copy_number)
         self._transcribed_by = transcribed_by
         self._translated_by = translated_by
 
@@ -61,10 +61,10 @@ def copy_number(self, value):
         if value != self._copy_number:
             if self.model is None:
                 # Easy
-                self._copy_number = value
+                self._copy_number = int(value)
             else:
                 # We need to make the model change the RNAP allocation
-                self._copy_number = value
+                self._copy_number = int(value)
                 self.model.edit_gene_copy_number(self.id)
         else:
             # Nothing to do here :)
diff --git a/etfl/core/macromolecule.py b/etfl/core/macromolecule.py
index 894c254..ade46fc 100644
--- a/etfl/core/macromolecule.py
+++ b/etfl/core/macromolecule.py
@@ -16,11 +16,12 @@
 
 
 class Macromolecule(Species, ABC):
-    def __init__(self, id=None, kdeg=0, *args, **kwargs):
+    def __init__(self, id=None, kdeg=0, scaling_factor = None, *args, **kwargs):
         Species.__init__(self, id = id, *args, **kwargs)
 
         self.kdeg = kdeg
         self.degradation = None
+        self._scaling_factor = scaling_factor
 
     @abstractmethod
     def init_variable(self, queue=False):
@@ -95,4 +96,7 @@ def molecular_weight(self):
 
     @property
     def scaling_factor(self):
-        return 1/self.molecular_weight
\ No newline at end of file
+        # Calculate it only once
+        if self._scaling_factor is None:
+            self._scaling_factor = 1/self.molecular_weight
+        return self._scaling_factor
\ No newline at end of file
diff --git a/etfl/core/memodel.py b/etfl/core/memodel.py
index 2d69086..a5fce7d 100644
--- a/etfl/core/memodel.py
+++ b/etfl/core/memodel.py
@@ -434,7 +434,7 @@ def express_genes(self, gene_list):
             if isinstance(gene, str):
                 the_gene = self.genes.get_by_id(gene)
             else:
-                the_gene = gene
+                the_gene = self.genes.get_by_id(gene.id)
 
             if not isinstance(the_gene, ExpressedGene):
                 continue
@@ -1309,10 +1309,10 @@ def edit_gene_copy_number(self, gene_id):
 
         rnap_alloc = self.get_constraints_of_type(RNAPAllocation)
         rnap_usage = self.get_variables_of_type(RNAPUsage)
-        if self.id in rnap_alloc and self.id in rnap_usage:
+        if gene_id in rnap_alloc and gene_id in rnap_usage:
             # We need to edit the variable
             # Modify the polymerase constraint
-            cons = rnap_alloc.get_by_id(self.id)
+            cons = rnap_alloc.get_by_id(gene_id)
             new_expr = self._get_rnap_allocation_expression(the_gene)
             cons.change_expr(new_expr)
             self.logger.debug('Changed RNAP allocation for gene {}'.format(gene_id))

From ad63220d5efe58e916547435e2c6fed033497d71 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:11:48 +0100
Subject: [PATCH 15/19] ENH: Adding RNAP binding equilibrium constraints

---
 etfl/core/equilibrium.py | 121 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 etfl/core/equilibrium.py

diff --git a/etfl/core/equilibrium.py b/etfl/core/equilibrium.py
new file mode 100644
index 0000000..339b275
--- /dev/null
+++ b/etfl/core/equilibrium.py
@@ -0,0 +1,121 @@
+from .genes import ExpressedGene
+from ..optim.variables import RNAPUsage, FreeEnzyme, BinaryActivator, \
+    LinearizationVariable
+from ..optim.constraints import GeneConstraint, EnzymeRatio,\
+    LinearizationConstraint
+from pytfa.optim.reformulation import petersen_linearization
+
+
+class RNAPBindingEquilibrium(GeneConstraint):
+    prefix = 'PBEq_'
+
+
+def linearize_product(model, b, x, queue=False):
+    """
+
+    :param model:
+    :param b: the binary variable
+    :param x: the continuous variable
+    :param queue: whether to queue the variables and constraints made
+    :return:
+    """
+
+    # Linearization step for ga_i * [E]
+    z_name = '__MUL__'.join([b.name, x.name])
+    # Add the variables
+    model_z_u = model.add_variable(kind=LinearizationVariable,
+                                  hook=model,
+                                  id_=z_name,
+                                  lb=0,
+                                  ub=x.ub,
+                                  queue=False)
+
+    big_m = x.ub*10
+
+    z_u, new_constraints = petersen_linearization(b=b, x=x, M=big_m,
+                                                  z=model_z_u)
+
+    # Add the constraints:
+    for cons in new_constraints:
+        model.add_constraint(kind=LinearizationConstraint,
+                             hook=model,
+                             id_=cons.name,
+                             expr=cons.expression,
+                             # expr=new_expression,
+                             ub=cons.ub,
+                             lb=cons.lb,
+                             queue=True)
+
+    model._push_queue()
+    return model_z_u
+
+def add_rnap_binding_equilibrium_constraints(model, the_rnap, Kb):
+    """
+
+    :param model:
+    :param the_rnap:
+    :param Kb:
+    :return:
+    """
+
+    # #1. Find if there is a ratio constraint on the RNAP - this is incompatible
+    # all_ratio_cons = model.get_constraints_of_type(EnzymeRatio)
+    #
+    # for rnap_id in model.rnap:
+    #     try:
+    #         model.remove_constraint(all_ratio_cons.get_by_id(rnap_id))
+    #     except KeyError:
+    #         pass
+
+    #2. Create the linearized bilinear product delta_u*RNAP_F
+
+    RNAP_F = model.get_variables_of_type(FreeEnzyme).get_by_id(the_rnap.id).variable
+    activation_vars = model.get_variables_of_type(BinaryActivator)
+
+    zs = {delta_u.name: linearize_product(model, delta_u, RNAP_F, queue=True)
+          for delta_u in activation_vars}
+
+    # TODO: cleanup
+    dna_cons = model.interpolation_constraint.DNA_interpolation.constraint
+    dna_hat = {k.name:v for k,v in
+               dna_cons.get_linear_coefficients(
+                   activation_vars.list_attr('variable')).items()}
+
+    free_rnap_times_gene_conc = sum(zs[delta_u]*dna_hat[delta_u] for delta_u in zs)
+
+    sorted_dna_conc = sorted(dna_hat.values())
+    dna_hat_resolution = max([x-y for x,y in zip(sorted_dna_conc[1:], sorted_dna_conc[:-1])])
+
+
+    all_rnap_usage = model.get_variables_of_type(RNAPUsage)
+
+    #3. For each gene:
+
+    for the_gene in model.genes:
+        if not isinstance(the_gene, ExpressedGene):
+            continue
+
+        copy_number = the_gene.copy_number
+
+        #3.1. get rnap usage
+
+        RNAP_l = all_rnap_usage.get_by_id(the_gene.id)
+
+        #3.2. create constraint
+
+        expr = Kb * RNAP_l - copy_number * free_rnap_times_gene_conc
+
+        # scaling
+        # expr *= model.dna.scaling_factor
+
+        #3.3 Add it to the model
+
+        model.add_constraint(kind=RNAPBindingEquilibrium,
+                             hook=the_gene,
+                             expr=expr,
+                             ub= copy_number*dna_hat_resolution/2,
+                             lb=-copy_number*dna_hat_resolution/2,
+                             queue=True)
+    model._push_queue()
+    model.regenerate_constraints()
+    model.regenerate_variables()

From 3d69706a9d5566fcc5842a37edd3970149b7da11 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:13:17 +0100
Subject: [PATCH 16/19] FIX: DNA formation export not properly handled

---
 etfl/io/dict.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/etfl/io/dict.py b/etfl/io/dict.py
index 176bf17..1da4834 100644
--- a/etfl/io/dict.py
+++ b/etfl/io/dict.py
@@ -29,6 +29,7 @@
     EnzymaticReaction, ProteinComplexation, DegradationReaction, \
     ExpressionReaction, DNAFormation
 from ..core.thermomemodel import ThermoMEModel
+from ..core.allocation import DNA_FORMATION_RXN_ID
 from ..optim.utils import rebuild_constraint, rebuild_variable
 from ..optim.variables import tRNAVariable, GrowthRate, FreeEnzyme
 from ..utils.utils import replace_by_reaction_subclass, replace_by_me_gene
@@ -406,12 +407,13 @@ def _add_me_reaction_info(rxn, rxn_dict):
     elif isinstance(rxn, EnzymaticReaction):
         rxn_dict['kind'] = 'EnzymaticReaction'
         rxn_dict['enzymes'] = [x.id for x in rxn.enzymes]
-    elif isinstance(rxn, DNAFormation):
-        rxn_dict['kind'] = 'DNAFormation'
     # Generic Reaction
     else:
         rxn_dict['kind'] = 'Reaction'
 
+    if isinstance(rxn, DNAFormation):
+        rxn_dict['kind'] = 'DNAFormation'
+
 def _add_thermo_reaction_info(rxn, rxn_dict):
     if hasattr(rxn, 'thermo'):
         rxn_dict['thermo'] = rxn.thermo
@@ -823,7 +825,9 @@ def find_degradation_reactions_from_dict(new, obj):
 def find_dna_formation_reaction_from_dict(new, obj):
     new_rxns = list()
     for rxn_dict in obj['reactions']:
-        if rxn_dict['kind'] == 'DNAFormation':
+        # TODO: CLEANUP
+        if rxn_dict['id'] == DNA_FORMATION_RXN_ID:
+        # if rxn_dict['kind'] == 'DNAFormation':
             dna = new.dna
             new_rxn = replace_by_reaction_subclass(new,
                                                    kind = DNAFormation,
@@ -894,7 +898,7 @@ def find_genes_from_dict(new, obj):
                 # Newer models
                 g._rna            = gene_dict['rna']
                 g._peptide        = gene_dict['peptide']
-                g._copy_number    = gene_dict['copy_number']
+                g._copy_number    = int(gene_dict['copy_number'])
                 g._transcribed_by = [new.enzymes.get_by_id(e)
                                      for e in gene_dict['transcribed_by']] \
                                      if gene_dict['transcribed_by'] else None

From 035d206f0df24bbe71183a2d7f55b7c294bd3474 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:15:09 +0100
Subject: [PATCH 17/19] ENH: DNA Polymerase

---
 etfl/data/ecoli.py             | 48 ++++++++++++++++++++++++++++++++++
 tutorials/helper_gen_models.py |  9 ++++---
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/etfl/data/ecoli.py b/etfl/data/ecoli.py
index 8d8de4d..9ceaefb 100644
--- a/etfl/data/ecoli.py
+++ b/etfl/data/ecoli.py
@@ -785,6 +785,54 @@ def get_atp_synthase_coupling(atps_name):
 
     return {atps_name:[atp_synthase]}
 
+def get_dna_polymerase(dna_pol_name='DNAPol3'):
+    """
+    https://en.wikipedia.org/wiki/DNA_polymerase_III_holoenzyme
+
+    The replisome is composed of the following:
+
+    2 DNA Pol III enzymes, each comprising α, ε and θ subunits. (It has been proven that there is a third copy of Pol III at the replisome.[1])
+        the α subunit (encoded by the dnaE gene) has the polymerase activity.
+        the ε subunit (dnaQ) has 3'→5' exonuclease activity.
+        the θ subunit (holE) stimulates the ε subunit's proofreading.
+    2 β units (dnaN) which act as sliding DNA clamps, they keep the polymerase bound to the DNA.
+    2 τ units (dnaX) which act to dimerize two of the core enzymes (α, ε, and θ subunits).
+    1 γ unit (also dnaX) which acts as a clamp loader for the lagging strand Okazaki fragments, helping the two β subunits to form a unit and bind to DNA. The γ unit is made up of 5 γ subunits which include 3 γ subunits, 1 δ subunit (holA), and 1 δ' subunit (holB). The δ is involved in copying of the lagging strand.
+    Χ (holC) and Ψ (holD) which form a 1:1 complex and bind to γ or τ. X can also mediate the switch from RNA primer to DNA.[2]
+    :return:
+    """
+
+    # DNA POLYMERASE III HOLOENZYME: Structure and Function of a Chromosomal Replicating Machine
+    # Annual Review of Biochemistry
+    # Vol. 64:171-200 (Volume publication date July 1995)
+    # https://doi.org/10.1146/annurev.bi.64.070195.001131
+
+    ktrans = 1000 * 3600
+
+    composition = {
+        # 2 DNA Pol III Enzymes
+        'b0184':2, #dnaE
+        'b0215':2, #dnaQ
+        'b1842':2, #holE
+        # Beta sliding clamp units and Tau scaffold
+        'b3701': 2*2,  # dnaN, Beta clamps
+        'b0470': 3,  # dnaX, 2 tau units and 1 gamma unit
+        'b0640': 1,  # holA, Delta subunit
+        'b1099': 1,  # holB, Delta prime subunit
+        'b4259': 1,  # holC, Xi subunit
+        'b4372': 1,  # holD, Psi subunit
+
+    }
+
+    dna_polymerase = Enzyme(dna_pol_name,
+                        name='DNA Polymerase 3',
+                        kcat_fwd=ktrans,
+                        kcat_bwd=ktrans,
+                        kdeg=kdeg_enz,
+                        composition=composition)
+
+    return dna_polymerase
+
 
 def get_transporters_coupling(model, additional_enz):
 
diff --git a/tutorials/helper_gen_models.py b/tutorials/helper_gen_models.py
index d494dba..3b20691 100644
--- a/tutorials/helper_gen_models.py
+++ b/tutorials/helper_gen_models.py
@@ -26,7 +26,7 @@
                         get_mrna_metrics, get_enz_metrics, \
                         remove_from_biomass_equation, get_ecoli_gen_stats, \
                         get_lloyd_coupling_dict, get_transporters_coupling, \
-                        get_essentials, get_average_kcat
+                        get_essentials, get_average_kcat, get_dna_polymerase
 
 from etfl.optim.config import standard_solver_config
 from etfl.analysis.summary import print_standard_sol
@@ -207,8 +207,8 @@ def create_model(has_thermo, has_expression, has_allocation,
                            rna_nucleotides_mp = rna_nucleotides_mp
                            )
     ecoli.add_mrnas(mrna_dict.values())
-    ecoli.add_ribosome(rib,free_ratio=free_rib_ratio)
-    ecoli.add_rnap(rnap, free_ratio=free_rnap_ratio)
+    ecoli.add_ribosome(rib, free_rib_ratio)
+    ecoli.add_rnap(rnap, free_rnap_ratio)
 
     ecoli.build_expression()
     ecoli.add_enzymatic_coupling(coupling_dict)
@@ -235,6 +235,9 @@ def create_model(has_thermo, has_expression, has_allocation,
                                        chromosome_len=chromosome_len,
                                        dna_dict=dna_nucleotides)
 
+        dna_pol = get_dna_polymerase()
+        ecoli.add_enzymatic_coupling({'DNA_formation':[dna_pol,]})
+
     # Need to put after, because dummy has to be taken into account if used.
     ecoli.populate_expression()
     ecoli.add_trna_mass_balances()

From a0c5bd19197fab37f8316d80fb4e73b9faefb093 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:16:18 +0100
Subject: [PATCH 18/19] ENH: Including RNAP Binding for plasmid study

---
 work/plasmids/main.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/work/plasmids/main.py b/work/plasmids/main.py
index 856f860..cb15c48 100644
--- a/work/plasmids/main.py
+++ b/work/plasmids/main.py
@@ -13,6 +13,7 @@
 from etfl.optim.config import standard_solver_config
 from etfl.analysis.summary import print_standard_sol
 from etfl.core.vector import TransModel
+from etfl.core.equilibrium import add_rnap_binding_equilibrium_constraints
 import yaml
 import sys
 
@@ -32,7 +33,7 @@
 
 def write_conf(model, conf):
     filename = 'outputs/' + conf['tag'] + '/config.yaml'
-    conf['objective'] = model.objective.expression
+    conf['objective'] = str(model.objective.expression)
     with open(filename,'w') as fid:
         yaml.dump(config, fid)
 
@@ -50,7 +51,7 @@ def write_conf(model, conf):
     standard_solver_config(model)
     model.solver.configuration.verbosity = config['options']['verbose']
 
-    copy_number = config['simulation']['copy_number']
+    copy_number = int(config['simulation']['copy_number'])
     vector_generator = vec_dict[config['simulation']['vector']]
     has_rnap = config['simulation']['add_rnap']
 
@@ -61,8 +62,26 @@ def write_conf(model, conf):
     # Model integration #
     #####################
 
+    # 1. Adding RNAP eq constraints
+    # Constant for binding of RNAP to lac promoter
+    # Value 	550 nM
+    # Organism 	Bacteria Escherichia coli
+    # Reference 	Bintu L, Buchler NE, Garcia HG, Gerland U, Hwa T, Kondev J, Phillips R. Transcriptional regulation by the numbers: models. Curr Opin Genet Dev. 2005 Apr15(2):116-24. Fig. 1PubMed ID15797194
+    # Primary Source 	[38] Liu M, Gupte G, Roy S, Bandwar RP, Patel SS, Garges S. Kinetics of transcription initiation at lacP1. Multiple roles of cyclic AMP receptor protein. J Biol Chem. 2003 Oct 10 278(41):39755-61PubMed ID12881519
+    # Method 	Primary source abstract: "[Researchers] examined the kinetics of open complex formation at the lacP1 promoter using tryptophan fluorescence of RNA polymerase and DNA fragments with 2-aminopurine substituted at specific positions."
+    # Comments 	P.118 caption to fig.1c: "In particular, making the simplest assumption that the genomic background for RNAP is given only by the non-specific binding of RNAP with DNA, [investigators] take K[NS]pd=10 000 nM [ref 37], for the lac promoter K[S]pd=550 nM [primary source] and for the T7 promoter, K[S]pd=3nM [BNID 103592]. For the lac promoter, this results in Δɛpd=-2.9kBT and for the T7 promoter, Δɛpd=-8.1kBT."
+    # BNID 	103590
+    #          ecoli density is ~ 1.2
+    #    mol/L * L/kg   * kg/g * g/gDW
+    Kb = 1e-9 * 1/1.2 * 1000 * 1/0.5
+    add_rnap_binding_equilibrium_constraints(model,
+                                             the_rnap=model.rnap['rnap'],
+                                             Kb=Kb)
+
+
     transmodel = TransModel(model, inplace = config['options']['inplace'])
     transmodel.add_vector(my_plasmid, copy_number = copy_number)
+    transmodel.reactions.ALS.upper_bound = 0
 
     transmodel.optimize()
     print_standard_sol(transmodel)
@@ -84,4 +103,5 @@ def write_conf(model, conf):
             outputs[analysis_str] = getattr(analysis, analysis_str)(**arguments)
 
         # Check antibiotic resistance is expressed ?
-    write_conf(transmodel, config)
+        #TODO: fix
+    # write_conf(transmodel, config)

From 9d1420da587cfc686597a60c7395acc08332c389 Mon Sep 17 00:00:00 2001
From: Pierre Salvy <pierre.salvy@epfl.ch>
Date: Fri, 8 Nov 2019 11:18:02 +0100
Subject: [PATCH 19/19] ENH: DNA polymerase nucleotide info

---
 organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv | 11 ++++++++++-
 organism_data/ntseq_from_kegg.py                 |  5 ++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv b/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv
index 9d6b193..d61b892 100644
--- a/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv
+++ b/organism_data/info_ecoli/iJO1366_nt_seq_kegg.csv
@@ -1429,4 +1429,13 @@ b1717,atgccaaaaattaagaccgtacgcggtgctgctaagcgcttcaaaaaaaccggtaaaggtggttttaagcacaa
 b4506,atgaaagttcttaactctctgcgtaccgcaaaagaacgccatccagactgtcagattgtgaagcgaaaaggacggctatatgtgatttgtaaatctaatccacgttttaaggccgttcagggtcgtaagaaaaaacgttga
 b3299,atgaaagttcgtgcttccgtcaagaaattatgccgtaactgcaaaatcgttaagcgtgatggtgtcatccgtgtgatttgcagtgccgagccgaagcataaacagcgccaaggctga
 b2811,atgacaaacccgcaattcgccggacatccgttcggcacaaccgtaaccgcagaaacgttacgcaataccttcgcaccgttgacgcaatgggaagataaatatcgccagttgatcatgctggggaaacagcttccggcattgccagacgagttaaaagcgcaggctaaagagattgccggatgcgaaaaccgcgtctggctgggatatacagtggctgaaaacggcaaaatgcatttctttggcgacagcgaagggcgcattgtgcgcggcctgctggcggtgttgttgactgccgttgaggggaaaaccgccgccgagttgcaggcacagtcaccactggcattgtttgatgagctgggattacgtgcgcagcttagcgcctcacgcagccaggggttaaatgcgttaagcgaggcgattatcgctgcgacgaagcaggtttaa
-b0010,atgggcaacactaagttggctaatccggcaccgctgggcctgatgggcttcggcatgaccaccattctgcttaacctgcacaacgtgggttatttcgctctggacggtattattcttgccatgggcattttctacggcggcatcgcgcaaatttttgctggtctgctggagtacaaaaaaggcaacactttcggtttaaccgcattcacctcttacggttctttctggctgacgctggttgcgattctgctgatgccgaaactgggtctgaccgatgcgccaaatgcacagttccttggtgtctacctgggtctgtggggcgtatttacgctgtttatgttcttcggcacgctgaaaggcgcacgcgttctgcaattcgttttctttagcctgaccgtgctgtttgccctgctggcgatcggtaacattgccggtaacgccgcaatcatccactttgccggctggattgggctgatctgcggtgccagcgcaatctatctggcgatgggtgaagtactgaacgagcagtttggtcgcaccgttctgccgattggtgaatcccactaa
\ No newline at end of file
+b0010,atgggcaacactaagttggctaatccggcaccgctgggcctgatgggcttcggcatgaccaccattctgcttaacctgcacaacgtgggttatttcgctctggacggtattattcttgccatgggcattttctacggcggcatcgcgcaaatttttgctggtctgctggagtacaaaaaaggcaacactttcggtttaaccgcattcacctcttacggttctttctggctgacgctggttgcgattctgctgatgccgaaactgggtctgaccgatgcgccaaatgcacagttccttggtgtctacctgggtctgtggggcgtatttacgctgtttatgttcttcggcacgctgaaaggcgcacgcgttctgcaattcgttttctttagcctgaccgtgctgtttgccctgctggcgatcggtaacattgccggtaacgccgcaatcatccactttgccggctggattgggctgatctgcggtgccagcgcaatctatctggcgatgggtgaagtactgaacgagcagtttggtcgcaccgttctgccgattggtgaatcccactaa
+b0184,atgtctgaaccacgtttcgtacacctgcgggtgcacagcgactactcgatgatcgatggcctggccaaaaccgcaccgttggtaaaaaaggcggcggcgttgggtatgccagcactggcgatcaccgatttcaccaacctttgtggtctggtgaagttctacggagcgggacatggcgcagggattaagcctatcgtcggggcagattttaacgtccagtgcgacctgctgggtgatgagttaacccacctgacggtactggcggcgaacaataccggctatcagaatctgacgttgctgatctcaaaagcgtatcagcgcgggtacggtgccgccgggccgatcatcgatcgcgactggcttatcgaattaaacgaagggttgatccttctttccggcggacgcatgggcgacgtcggacgcagtcttttgcgtggtaacagcgcgctggtagatgagtgtgtcgcgttttatgaagaacacttcccggatcgctattttctcgagctgatccgcaccggcaggccggatgaagaaagctatctgcacgcggcggtggaactggcggaagcgcgcggtttgcccgtcgtggcgaccaacgacgtgcgctttatcgacagcagcgactttgacgcacacgaaatccgcgtcgcgatccacgacggctttaccctcgacgatcctaaacgcccgcgtaactattcgccgcagcaatatatgcgtagcgaagaggagatgtgtgagctgtttgccgacatccccgaagcccttgccaacaccgttgagatcgccaaacgctgtaacgtaaccgtgcgtcttggtgaatacttcctgccgcagttcccgaccggggacatgagcaccgaagattatctggtcaagcgtgcaaaagagggcctggaagagcgtctggcctttttattccctgatgaggaagaacgtcttaagcgccgcccggaatatgacgaacgtctggagactgaacttcaggttatcaaccagatgggcttcccgggctacttcctcatcgttatggaatttatccagtggtcgaaagataacggcgtaccggtagggccaggccgtggctccggtgcgggttcactggtggcctacgcgctgaaaatcaccgacctcgatccgctggaatttgacctgctgttcgaacgtttccttaacccggaacgtgtctccatgcctgacttcgacgttgacttctgtatggagaaacgcgatcaggttatcgagcacgtagcggacatgtacggtcgtgatgcggtatcgcagatcatcaccttcggtacaatggcggcgaaagcggtgatccgcgacgtaggccgcgtgctggggcatccgtacggctttgtcgatcgtatctcgaaactgatcccgcccgatccggggatgacgctggcgaaagcgtttgaagccgagccgcagctgccggaaatctacgaagcggatgaagaagttaaggcgctgatcgacatggcgcgcaaactggaaggggtcacccgtaacgccggtaagcacgccggtggggtggttatcgcgccgaccaaaattaccgattttgcgccgctttactgcgatgaagagggcaaacatccggtcacccagtttgataaaagcgacgttgaatacgccggactggtgaagttcgacttccttggtttgcgtacgctcaccatcatcaactgggcgctggagatgatcaacaagcggcgggcgaagaatggcgagccgccgctggatatcgctgcgatcccgctggatgataagaaaagcttcgacatgctgcaacgctcggaaaccacggcggtattccagcttgaatcgcgcggcatgaaggacctgatcaagcgtctacaacctgactgcttcgaagatatgatcgccctagtggcactgttccgccccggtccgttgcaatcagggatggtggataactttatcgaccgtaaacatggtcgtgaagagatctcctatccggacgtacagtggcagcatgaaagcctgaaaccggtactggagccaacctacggcattatcctgtatcaggaacaggtcatgcagattgcgcaggtgctttctggttataccctcggtggcgcggatatgctgcgtcgtgcgatgggtaagaaaaagccggaagagatggctaagcaacgttctgtatttgctgaaggtgcagaaaagaacggaatcaacgctgaactggcgatgaaaatcttcgacctggtggagaaattcgctggttacggatttaacaaatcgcactctgcggcctatgctttggtgtcatatcaaacgttatggctgaaagcgcactatcctgcggagtttatggcggcggtaatgaccgccgatatggacaacaccgagaaggtggtgggtctggtggatgagtgctggcggatggggctgaaaatcctgccaccagatataaactccggtctttaccatttccacgtcaacgacgacggcgaaatcgtgtatggtattggcgcgatcaaaggggtcggtgaaggtccgattgaggccatcatcgaagcccgtaataaaggcggctacttccgcgaactgtttgatctctgcgcccgtaccgacaccaaaaagttgaaccgtcgcgtgctggaaaaactgatcatgtccggggcgtttgaccgtcttgggccacatcgcgcagcgctgatgaactcgctgggcgatgcgttaaaagcggcagatcaacacgcgaaagcggaagctatcggtcaggccgatatgttcggcgtgctggccgaagagccggaacaaattgaacaatcctacgccagctgccaaccgtggccggagcaggtggtattagatggggaacgtgaaacgttaggcctgtacctgaccggacaccctatcaaccagtatttaaaagagattgagcgttatgtcggaggcgtaaggctgaaagacatgcacccgacagaacgtggtaaagtcatcacggctgcggggctcgttgttgccgcgcgggttatggtcaccaagcgcggcaatcgtatcggtatctgcacgctggatgaccgttccgggcggctggaagtgatgttgtttactgacgccctggataaataccagcaattgctggaaaaagaccgcatacttatcgtcagcggacaggtcagctttgatgacttcagcggtgggcttaaaatgaccgctcgcgaagtgatggatattgacgaagcccgggaaaaatatgctcgcgggcttgctatctcgctgacggacaggcaaattgatgaccagcttttaaaccgactccgtcagtctctggaaccccaccgctctgggacaattccagtacatctctactatcagagggcggatgcacgcgcgcggttgcgttttggcgcgacgtggcgtgtctctccgagcgatcgtttattaaacgatctccgtggcctcattggttcggagcaggtggaactggagtttgactaa
+b0215,atgagcactgcaattacacgccagatcgttctcgataccgaaaccaccggtatgaaccagattggtgcgcactatgaaggccacaagatcattgagattggtgccgttgaagtggtgaaccgtcgcctgacgggcaataacttccatgtttatctcaaacccgatcggctggtggatccggaagcctttggcgtacatggtattgccgatgaatttttgctcgataagcccacgtttgccgaagtagccgatgagttcatggactatattcgcggcgcggagttggtgatccataacgcagcgttcgatatcggctttatggactacgagttttcgttgcttaagcgcgatattccgaagaccaatactttctgtaaggtcaccgatagccttgcggtggcgaggaaaatgtttcccggtaagcgcaacagcctcgatgcgttatgtgctcgctacgaaatagataacagtaaacgaacgctgcacggggcattactcgatgcccagatccttgcggaagtttatctggcgatgaccggtggtcaaacgtcgatggcttttgcgatggaaggagagacacaacagcaacaaggtgaagcaacaattcagcgcattgtacgtcaggcaagtaagttacgcgttgtttttgcgacagatgaagagattgcagctcatgaagcccgtctcgatctggtgcagaagaaaggcggaagttgcctctggcgagcataa
+b1842,atgctgaagaatctggctaaactggatcaaacagaaatggataaagtgaatgtcgatttggcggcggccggggtggcatttaaagaacgctacaatatgccggtgatcgctgaagcggttgaacgtgaacagcctgaacatttgcgcagctggtttcgcgagcggcttattgcccaccgtttggcttcggtcaatctgtcacgtttaccttacgagcccaaacttaaataa
+b3701,atgaaatttaccgtagaacgtgagcatttattaaaaccgctacaacaggtgagcggtccgttaggtggtcgtcctacgctaccgattctcggtaatctgctgttacaggttgctgacggtacgttgtcgctgaccggtactgatctcgagatggaaatggtggcacgtgttgcgctggttcagccacacgagccaggagcgacgaccgttccggcgcgcaaattctttgatatctgccgtggtctgcctgaaggcgcggaaattgccgtgcagctggaaggtgaacggatgctggtacgctccgggcgtagccgtttttcgctgtctaccctgccagcggcggatttcccgaacctcgatgactggcagagtgaagtcgaatttaccctgccgcaggcaacgatgaagcgtctgattgaagcgacccagttttctatggcgcatcaggacgttcgctattacttaaatggtatgctgtttgaaaccgaaggtgaagaactgcgcaccgtggcaaccgacggccaccgtctggcggtctgttcaatgccaattggtcaatctttgccaagccattcggtgatcgtaccgcgtaaaggcgtgattgaactgatgcgtatgctcgacggcggcgacaatccgctgcgcgtacagattggcagcaacaacattcgcgcccacgttggcgactttatcttcacctccaaactggtggatggtcgcttcccggattatcgccgcgttctgccgaagaacccggacaaacatctggaagctggctgcgatctgctcaagcaggcgtttgctcgcgcggcgattctctctaacgagaaattccgcggcgtacgtctttatgtcagcgaaaaccagctgaaaatcaccgccaacaacccggaacaggaagaagcggaagagatcctcgacgttacctatagcggtgcggagatggaaatcggcttcaacgtcagttatgtgctggatgttctgaacgcgctgaaatgcgaaaacgtccgcatgatgctgaccgattcggtttccagcgtgcagattgaagatgcggccagccagagcgcggcttatgttgtcatgccaatgagactgtaa
+b0470,atgagttatcaggtcttagcccgaaaatggcgcccacaaacctttgctgacgtcgtcggccaggaacatgtgctgaccgcactggcgaacggcttgtcgttagggcgtattcatcatgcttatcttttttccggcacccgtggcgtcggaaaaacctctatcgcccgactgctggcgaaggggctaaactgcgaaaccggcattaccgcgacgccgtgcggcgtgtgcgataactgtcgtgaaatcgagcaggggcgctttgtcgatctgattgaaatcgacgccgcctcgcgcaccaaagttgaagatacccgcgacctgctggataacgtccagtacgctccggcgcgtggtcgtttcaaagtttatctgatcgacgaagtgcatatgctgtcgcgccacagctttaacgcactgttaaaaacccttgaagagccgccggagcacgttaagtttctgctggcgacgaccgatccacagaaattgccggtgacgattttgtcacgctgtctgcaatttcatctcaaggcgctggatgtcgagcaaattcgccatcagcttgagcacatcctcaacgaagaacatatcgctcacgagccgcgggcgctgcaattgctggcacgcgccgctgaaggcagcctgcgagatgccttaagtctgaccgaccaggcgattgccagcggtgacggccaggtttcaacccaggcggtcagtgcgatgctgggtacgcttgacgacgatcaggcgctgtcgctggttgaagcgatggtcgaggccaacggcgagcgcgtaatggcgctgattaatgaagccgctgcccgtggtatcgagtgggaagcgttgctggtggaaatgctcggcctgttgcatcgtattgcgatggtacaactttcgcctgctgcacttggcaacgacatggccgccatcgagctgcggatgcgtgaactggcgcgcaccataccgccgacggatattcagctttactatcagacgctgttgattggtcgcaaagaattaccgtatgcgccggaccgtcgcatgggcgttgagatgacgctgctgcgcgcgctggcattccatccgcgtatgccgctgcctgagccagaagtgccacgacagtcctttgcacccgtcgcgccaacggcagtaatgacgccaacccaggtgccgccgcaaccgcaatcagcgccgcagcaggcaccgactgtaccgctcccggaaaccaccagccaggtgctggcggcgcgccagcagttgcagcgcgtgcagggagcaaccaaagcaaaaaagagtgaaccggcagccgctacccgcgcgcggccggtgaataacgctgcgctggaaagactggcttcggtcaccgatcgcgttcaggcgcgtccggtgccatcggcgctggaaaaagcgccagccaaaaaagaagcgtatcgctggaaggcgaccactccggtgatgcagcaaaaagaagtggtcgccacgccgaaggcgctgaaaaaagcgctggaacatgaaaaaacgccggaactggcggcgaagctagcggcagaagccattgagcgcgacccgtgggcggcacaggtgagccaactttcgctaccaaaactggtcgaacaggtggcgttaaatgcctggaaagaggagagcgacaacgcagtatgtctgcatttgcgctcctctcagcggcatttgaacaaccgcggtgcacagcaaaaactggctgaagcgttgagcatgttaaaaggttcaacggttgaactgactatcgttgaagatgataatcccgcggtgcgtacgccgctggagtggcgtcaggcgatatacgaagaaaaacttgcgcaggcgcgcgagtccattattgcggataataatattcagaccctgcgtcggttcttcgatgcggagctggatgaagaaagtatccgccccatttga
+b0640,atgattcggttgtacccggaacaactccgcgcgcagctcaatgaagggctgcgcgcggcgtatcttttacttggtaacgatcctctgttattgcaggaaagccaggacgctgttcgtcaggtagctgcggcacaaggattcgaagaacaccacactttttccattgatcccaacactgactggaatgcgatcttttcgttatgccaggctatgagtctgtttgccagtcgacaaacgctattgctgttgttaccagaaaacggaccgaatgcggcgatcaatgagcaacttctcacactcaccggacttctgcatgacgacctgctgttgatcgtccgcggtaataaattaagcaaagcgcaagaaaatgccgcctggtttactgcgcttgcgaatcgcagcgtgcaggtgacctgtcagacaccggagcaggctcagcttccccgctgggttgctgcgcgcgcaaaacagctcaacttagaactggatgacgcggcaaatcaggtgctctgctactgttatgaaggtaacctgctggcgctggctcaggcactggagcgtttatcgctgctctggccagacggcaaattgacattaccgcgcgttgaacaggcggtgaatgatgccgcgcatttcaccccttttcattgggttgatgctttgttgatgggaaaaagtaagcgcgcattgcatattcttcagcaactgcgtctggaaggcagcgaaccggttattttgttgcgcacattacaacgtgaactgttgttactggttaacctgaaacgccagtctgcccatacgccactgcgtgcgttgtttgataagcatcgggtatggcagaaccgccggggcatgatgggcgaggcgttaaatcgcttaagtcagacgcagttacgtcaggccgtgcaactcctgacacgaacggaactcaccctcaaacaagattacggtcagtcagtgtgggcagagctggaagggttatctcttctgttgtgccataaacccctggcggacgtatttatcgacggttga
+b1099,atgagatggtatccatggttacgacctgatttcgaaaaactggtagccagctatcaggccggaagaggtcaccatgcgctactcattcaggcgttaccgggcatgggcgatgatgctttaatctacgccctgagccgttatttactctgccaacaaccgcagggccacaaaagttgcggtcactgtcgtggatgtcagttgatgcaggctggcacgcatcccgattactacaccctggctcccgaaaaaggaaaaaatacgctgggcgttgatgcggtacgtgaggtcaccgaaaagctgaatgagcacgcacgcttaggtggtgcgaaagtcgtttgggtaaccgatgctgccttactaaccgacgccgcggctaacgcattgctgaaaacgcttgaagagccaccagcagaaacttggtttttcctggctacccgcgagcctgaacgtttactggcaacattacgtagtcgttgtcggttacattaccttgcgccgccgccggaacagtacgccgtgacctggctttcacgcgaagtgacaatgtcacaggatgcattacttgccgcattgcgcttaagcgccggttcgcctggcgcggcactggcgttgtttcagggagataactggcaggctcgtgaaacattgtgtcaggcgttggcatatagcgtgccatcgggcgactggtattcgctgctagcggcccttaatcatgaacaagctccggcgcgtttacactggctggcaacgttgctgatggatgcgctaaaacgccatcatggtgctgcgcaggtgaccaatgttgatgtgccgggcctggtcgccgaactggcaaaccatctttctccctcgcgcctgcaggctatactgggggatgtttgccacattcgtgaacagttaatgtctgttacaggcatcaaccgcgagcttctcatcaccgatcttttactgcgtattgagcattacctgcaaccgggcgttgtgctaccggttcctcatctttaa
+b4259,atgaaaaacgcgacgttctaccttctggacaatgacaccaccgtcgatggcttaagcgccgttgagcaactggtgtgtgaaattgccgcagaacgttggcgcagcggtaagcgcgtgctcatcgcctgtgaagatgaaaagcaggcttaccggctggatgaagccctgtgggcgcgtccggcagaaagctttgttccgcataatttagcgggagaaggaccgcgcggcggtgcaccggtggagatcgcctggccgcaaaagcgtagcagcagccggcgcgatatattgattagtctgcgaacaagctttgcagattttgccaccgctttcacagaagtggtagacttcgttccttatgaagattctctgaaacaactggcgcgcgaacgctataaagcctaccgcgtggctggtttcaacctgaatacggcaacctggaaataa
+b4372,atgacatcccgacgagactggcagttacagcaactgggcattacccagtggtcgctgcgtcgccctggcgcgttgcagggggagattgccattgcgatcccggcacacgtccgtctggtgatggtggcaaacgatcttcccgccctgactgatcctttagtgagcgatgttctgcgcgcattaaccgtcagccccgaccaggtgctgcaactgacgccagaaaaaatcgcgatgctgccgcaaggcagtcactgcaacagttggcggttgggtactgacgaaccgctatcactggaaggcgctcaggtggcatcaccggcgctcaccgatttacgggcaaacccaacggcacgcgccgcgttatggcaacaaatttgcacatatgaacacgatttcttccctcgaaacgactga
\ No newline at end of file
diff --git a/organism_data/ntseq_from_kegg.py b/organism_data/ntseq_from_kegg.py
index eedd8e9..841ce60 100644
--- a/organism_data/ntseq_from_kegg.py
+++ b/organism_data/ntseq_from_kegg.py
@@ -19,10 +19,13 @@
                              delimiter='\t',
                              header=None)[0]
 
-transporter_genes = pd.read_csv(pjoin(data_dir,'transporters_kcats.csv'),
+transporter_genes = pd.read_csv(pjoin(data_folder,'transporters_kcats.csv'),
                                header=0, skiprows=[1,], # Units row
                                index_col=0)['gene']
 
+dnapol3_genes = pd.read_csv(pjoin(data_folder,'dnapol3_genes.csv'),
+                               header=None)
+
 all_b_genes = pd.concat([all_b_genes, rnap_genes, rrna_genes, rpeptide_genes, transporter_genes])
 all_b_genes.drop_duplicates(inplace=True)