working version, debug needed

COBREXA · Mar 6, 2024 · a4b6cce · a4b6cce
1 parent 06021f1
commit a4b6cce
Show file tree

Hide file tree

Showing 3 changed files with 208 additions and 147 deletions.
diff --git a/docs/src/examples/05-enzyme-constrained-models.jl b/docs/src/examples/05-enzyme-constrained-models.jl
@@ -329,6 +329,21 @@ ec_solution = enzyme_constrained_flux_balance_analysis(
     optimizer = GLPK.Optimizer,
 )
 
+# We can notice that the objective function is a little lower than with
+# unconstrained E. coli core:
+
+ec_solution.objective
+
+# One can also observe many interesting thing, e.g. the amount of gene product
+# material required for the system to run:
+
+ec_solution.gene_product_amount
+
+# The total amount of required gene product mass is, by default, present as
+# `total_capacity`:
+
+ec_solution.gene_product_capacity
+
 #src these values should be unique (glucose transporter is the only way to get carbon into the system)
 @test isapprox(ec_solution.objective, 0.706993382849705, atol = TEST_TOLERANCE) #src
 @test isapprox( #src
@@ -343,10 +358,16 @@ ec_solution = enzyme_constrained_flux_balance_analysis(
     atol = TEST_TOLERANCE, #src
 ) #src
 
-# ## Running a simplified enzyme constrained model
-# We can also simplify the normal enzyme constrained model, by only considering
-# the fastest isozyme, hence obviating the need to create variables for each
-# isozyme, and the actual gene products.
+# ## Simplified models
+#
+# Because most reactions typically only use a single isozyme, we may also use a
+# simplified representation of the problem where this fact is reflected, saving
+# the variable allocation for the isozymes.
+#
+# [`simplified_enzyme_constrained_flux_balance_analysis`](@ref) takes similar
+# arguments as the [`enzyme_constrained_flux_balance_analysis`](@ref), but
+# automatically chooses the "fastest" reaction isozyme for each reaction
+# direction and builds the model with that.
 
 simplified_ec_solution = simplified_enzyme_constrained_flux_balance_analysis(
     model;
@@ -356,6 +377,15 @@ simplified_ec_solution = simplified_enzyme_constrained_flux_balance_analysis(
     optimizer = GLPK.Optimizer,
 )
 
+# In this case, the result is the same as with the full analysis:
+
+simplified_ec_solution.capacity_limits.total_capacity
+
+# Gene product amounts are not present in the model but are reconstructed
+# nevertheless (they are uniquely determined by the flux:
+
+simplified_ec_solution.gene_product_amounts
+
 @test isapprox( #src
     ec_solution.objective, #src
     simplified_ec_solution.objective, #src

diff --git a/src/builders/enzymes.jl b/src/builders/enzymes.jl
@@ -69,9 +69,9 @@ export isozyme_flux_constraints
 """
 $(TYPEDSIGNATURES)
 
-A constraint tree that binds the isozyme amounts to gene product amounts
-accordingly to their multiplicities (aka. stoichiometries, protein units, ...)
-given by `isozyme_stoichiometry`.
+A constraint tree that computes the gene product amounts from given isozyme
+amounts their multiplicities (aka. stoichiometries, protein units, ...) given
+by `isozyme_stoichiometry`.
 
 Values in ConstraintTree `gene_product_amounts` should describe the gene
 product allocations.  Allocation for the isozyme is ignored if the gene product
@@ -85,30 +85,24 @@ and reverse-direction amounts at once. To only use a single tree, use an
 uni-tuple: `isozyme_amounts = tuple(my_tree)`.
 
 Parameter function `isozyme_stoichiometry` gets called with a reaction and
-isozyme ID as given by the isozyme amount trees. It should return `nothing` in
+isozyme IDs as given by the isozyme amount trees. It should return `nothing` in
 case there's no information -- in such case, the isozyme is *not going to be
 included* in the calculation of gene product mass.
 """
-function gene_product_isozyme_constraints(
-    gene_product_amounts::C.ConstraintTree,
-    isozyme_amounts,
-    isozyme_stoichiometry,
-)
+function isozyme_gene_product_amount_constraints(isozyme_amounts, isozyme_stoichiometry)
     res = C.ConstraintTree()
-    # This needs to invert the stoichiometry mapping,
+    # This needs to "invert" the stoichiometry mapping,
     # so we patch up a fresh new CT in place.
     for iss in isozyme_amounts
         for (rid, is) in iss
             for (iid, i) in is
                 gpstoi = isozyme_stoichiometry(rid, iid)
                 isnothing(gpstoi) && continue
                 for (gp, stoi) in gpstoi
-                    haskey(gene_product_amounts, gp) || continue
                     if haskey(res, gp)
                         res[gp].value += i.value * stoi
                     else
-                        res[gp] =
-                            C.Constraint(i.value * stoi - gene_product_amounts[gp].value, 0)
+                        res[gp] = C.Constraint(i.value * stoi, 0)
                     end
                 end
             end
@@ -117,7 +111,7 @@ function gene_product_isozyme_constraints(
     res
 end
 
-export gene_product_isozyme_constraints
+export isozyme_gene_product_amount_constraints
 
 """
 $(TYPEDSIGNATURES)
@@ -126,8 +120,6 @@ Returns a constraint tree with enzyme capacity constraints, added for reactions
 in `fluxes_forward` and `fluxes_reverse`. This is used to construct the
 constraint system in [`enzyme_constrained_flux_balance_constraints`](@ref).
 
-Parameter `gene_ids` is an iterable collection of gene identifiers (as `Symbol`s).
-
 Parameter function `isozyme_ids` takes a reaction ID and returns `nothing` if
 the reaction does not have isozymes associated with it, or an iterable
 container of all the isozyme IDs for that reaction (as `Symbol`s).
@@ -136,25 +128,18 @@ Parameters `isozyme_forward_ids` and `isozyme_reverse_ids` can be used to
 fine-tune the generated isozymes in either direction; both default to
 `isozyme_ids`.
 
-Parameter function `gene_product_bound` can be used to generate a bound on the
-gene product amount variables (the `Symbol` key is passed as a parameter). By
-default, all amounts are bounded to be non-negative.
-
 The keys in the output constraint tree can be customized by setting
 `isozyme_forward_amounts_name`, `isozyme_reverse_amounts_name` and
 `gene_product_amounts_name`.
 """
 enzyme_variables(;
     fluxes_forward::C.ConstraintTree,
     fluxes_reverse::C.ConstraintTree,
-    gene_ids = Symbol[],
     isozyme_ids = _ -> nothing,
     isozyme_forward_ids = isozyme_ids,
     isozyme_reverse_ids = isozyme_ids,
-    gene_product_bound = _ -> C.Between(0, Inf),
     isozyme_forward_amounts_name = :isozyme_forward_amounts,
     isozyme_reverse_amounts_name = :isozyme_reverse_amounts,
-    gene_product_amounts_name = :gene_product_amounts,
 ) =
     isozyme_forward_amounts_name^isozyme_amount_variables(
         Symbol.(keys(fluxes_forward)),
@@ -163,10 +148,6 @@ enzyme_variables(;
     isozyme_reverse_amounts_name^isozyme_amount_variables(
         Symbol.(keys(fluxes_reverse)),
         isozyme_reverse_ids,
-    ) +
-    gene_product_amounts_name^C.variables(
-        keys = gene_ids,
-        bounds = gene_product_bound.(gene_ids),
     )
 
 export enzyme_variables
@@ -196,94 +177,125 @@ gene_product_ids, capacity_bound)` which are converted to a constraint
 identified by the `limit_id` that limits the total mass of `gene_product_ids`
 (which is any iterable container) by `capacity_bound`.
 """
-enzyme_constraints(;
+function enzyme_constraints(;
     fluxes_forward::C.ConstraintTree,
     fluxes_reverse::C.ConstraintTree,
     isozyme_forward_amounts::C.ConstraintTree,
     isozyme_reverse_amounts::C.ConstraintTree,
-    gene_product_amounts::C.ConstraintTree,
     kcat_forward = (_, _) -> nothing,
     kcat_reverse = (_, _) -> nothing,
     isozyme_gene_product_stoichiometry = (_, _) -> nothing,
     gene_product_molar_mass = _ -> nothing,
     capacity_limits = [],
     isozyme_flux_forward_balance_name = :isozyme_flux_forward_balance,
     isozyme_flux_reverse_balance_name = :isozyme_flux_reverse_balance,
-    gene_product_isozyme_balance_name = :gene_product_isozyme_balance,
+    gene_product_amounts_name = :gene_product_amounts,
     gene_product_capacity_name = :gene_product_capacity,
-) =
-    isozyme_flux_forward_balance_name^isozyme_flux_constraints(
-        isozyme_forward_amounts,
-        fluxes_forward,
-        kcat_forward,
-    ) *
-    isozyme_flux_reverse_balance_name^isozyme_flux_constraints(
-        isozyme_reverse_amounts,
-        fluxes_reverse,
-        kcat_reverse,
-    ) *
-    gene_product_isozyme_balance_name^gene_product_isozyme_constraints(
-        gene_product_amounts,
+)
+    gene_product_amounts = isozyme_gene_product_amount_constraints(
         (isozyme_forward_amounts, isozyme_reverse_amounts),
         isozyme_gene_product_stoichiometry,
-    ) *
-    gene_product_capacity_name^C.ConstraintTree(
-        Symbol(id) => C.Constraint(;
-            value = sum(
-                gene_product_amounts[gp].value * gpmm for
-                (gp, gpmm) in ((gp, gene_product_molar_mass(gp)) for gp in gps) if
-                !isnothing(gpmm)
-            ),
-            bound,
-        ) for (id, gps, bound) in capacity_limits
     )
 
+    return isozyme_flux_forward_balance_name^isozyme_flux_constraints(
+               isozyme_forward_amounts,
+               fluxes_forward,
+               kcat_forward,
+           ) *
+           isozyme_flux_reverse_balance_name^isozyme_flux_constraints(
+               isozyme_reverse_amounts,
+               fluxes_reverse,
+               kcat_reverse,
+           ) *
+           gene_product_amounts_name^gene_product_amounts *
+           gene_product_capacity_name^C.ConstraintTree(
+               id => C.Constraint(;
+                   value = sum(
+                       gene_product_amounts[gp].value * gpmm for
+                       (gp, gpmm) in ((gp, gene_product_molar_mass(gp)) for gp in gps) if
+                       !isnothing(gpmm) && haskey(gene_product_amounts, gp)
+                   ),
+                   bound,
+               ) for (id, gps, bound) in capacity_limits
+           )
+end
+
 export enzyme_constraints
 
 """
 $(TYPEDSIGNATURES)
 
-Returns a constraint tree, built from `constraints`, with simplified enzyme
-constraints added based on reactions in `fluxes`.
+Like [`isozyme_gene_product_amount_constraints`](@ref), but works with the
+"simplified" view where each reaction has an uniquely determined catalytic
+isozyme, as with [`simplified_enzyme_constraints`](@ref).
 
-Inputs:
-- `fastest_isozyme_forward` and `fastest_isozyme_reverse` are functions that
-  take a reaction ID and return the smallest MW/kcat for all isozymes that can
-  catalyze the reaction.
-- `capacity_limits` is an iterable container of triples `(capacity_id,
-    reaction_ids, capacity_bound)`, which creates capacity bound(s).
+As the main difference, `isozyme_amounts` is not supposed to contain a subtree
+of isozymes for each reaction, and `isozyme_stoichiometry` accordingly expects
+only a single identifier parameter for the reaction ID.
 """
-function simplified_enzyme_constraints(
-    constraints::C.ConstraintTree;
-    fluxes = constraints.fluxes,
-    fastest_isozyme_forward = _ -> 0.0,
-    fastest_isozyme_reverse = _ -> 0.0,
+function simplified_isozyme_gene_product_amount_constraints(
+    isozyme_amounts_stoichiometry_kcat,
+)
+    res = C.ConstraintTree()
+    for (iss, stoikcatfn) in isozyme_amounts
+        for (rid, i) in iss
+            x = stoikcatfn(rid)
+            isnothing(x) && continue
+            gpstoi, kcat = x
+            isnothing(kcat) && continue
+            for (gp, stoi) in gpstoi
+                if haskey(res, gp)
+                    res[gp].value += i.value * stoi / kcat
+                else
+                    res[gp] = C.Constraint(i.value * stoi / kcat, 0)
+                end
+            end
+        end
+    end
+    res
+end
+
+export simplified_isozyme_gene_product_amount_constraints
+
+"""
+$(TYPEDSIGNATURES)
+
+Build a constraint system that bounds fluxes according to their enzyme mass
+requirements, with respect to per-reaction enzyme mass costs.
+
+Parameter functions `mass_cost_forward` and `mass_cost_reverse` take a flux ID
+(corresponding to a flux in `fluxes_forward` and `fluxes_reverse`) and return
+the enzyme mass required to catalyze one "unit" of reaction in the forward or
+reverse direction, respectively. Returning `nothing` ignores the mass cost.
+
+`capacity_limits` is an iterable container of triples `(limit_id, flux_ids,
+bound)`, which creates the capacity bounds over groups of fluxes (in the same
+manner as for gene products in [`enzyme_constraints`](@ref)).
+"""
+function simplified_enzyme_constraints(;
+    fluxes_forward::C.ConstraintTree,
+    fluxes_reverse::C.ConstraintTree,
+    mass_cost_forward = _ -> nothing,
+    mass_cost_reverse = _ -> nothing,
     capacity_limits = [],
 )
+    function contribution(fl, cost, rid)
+        c = cost(rid)
+        isnothing(c) && !haskey(fl, rid) && return zero(C.LinearValue)
+        return c * fl[rid]
+    end
 
-    # allocate variables for everything (nb. += wouldn't associate right here)
-    constraints =
-        constraints +
-        sign_split_variables(fluxes; positive = :fluxes_forward, negative = :fluxes_reverse)
-
-    # connect all parts with constraints
-    constraints *
-    :directional_flux_balance^sign_split_constraints(
-        positive = constraints.fluxes_forward,
-        negative = constraints.fluxes_reverse,
-        signed = constraints.fluxes,
-    ) *
-    :capacity_bounds^C.ConstraintTree(
-        Symbol(id) => C.Constraint(
+    return C.ConstraintTree(
+        id => C.Constraint(;
             value = sum(
-                C.value(constraints.fluxes_forward[rid]) * fastest_isozyme_forward(rid)
-                for rid in flxs
+                contribution(fluxes_forward, mass_cost_forward, i) for i in fluxes;
+                init = zero(C.LinearValue),
             ) + sum(
-                C.value(constraints.fluxes_reverse[rid]) * fastest_isozyme_reverse(rid)
-                for rid in flxs
+                contribution(fluxes_reverse, mass_cost_reverse, i) for i in fluxes;
+                init = zero(C.LinearValue),
             ),
-            bound = C.Between(0, limit),
-        ) for (id, flxs, limit) in capacity_limits
+            bound = bound,
+        ) for (id, fluxes, bound) in capacity_limits
     )
 end