COBREXA · exaexa · Mar 7, 2024 · Feb 2, 2024 · Feb 2, 2024 · Feb 2, 2024
diff --git a/docs/src/examples/05-enzyme-constrained-models.jl b/docs/src/examples/05-enzyme-constrained-models.jl
@@ -329,6 +329,21 @@ ec_solution = enzyme_constrained_flux_balance_analysis(
     optimizer = GLPK.Optimizer,
 )
 
+# We can notice that the objective function is a little lower than with
+# unconstrained E. coli core:
+
+ec_solution.objective
+
+# One can also observe many interesting thing, e.g. the amount of gene product
+# material required for the system to run:
+
+ec_solution.gene_product_amounts
+
+# The total amount of required gene product mass is, by default, present as
+# `total_capacity`:
+
+ec_solution.gene_product_capacity
+
 #src these values should be unique (glucose transporter is the only way to get carbon into the system)
 @test isapprox(ec_solution.objective, 0.706993382849705, atol = TEST_TOLERANCE) #src
 @test isapprox( #src
@@ -342,3 +357,37 @@ ec_solution = enzyme_constrained_flux_balance_analysis(
     0.011875920383431717, #src
     atol = TEST_TOLERANCE, #src
 ) #src
+
+# ## Simplified models
+#
+# Because most reactions typically only use a single isozyme, we may also use a
+# simplified representation of the problem where this fact is reflected, saving
+# the variable allocation for the isozymes.
+#
+# [`simplified_enzyme_constrained_flux_balance_analysis`](@ref) takes similar
+# arguments as the [`enzyme_constrained_flux_balance_analysis`](@ref), but
+# automatically chooses the "fastest" reaction isozyme for each reaction
+# direction and builds the model with that.
+
+simplified_ec_solution = simplified_enzyme_constrained_flux_balance_analysis(
+    model;
+    reaction_isozymes,
+    gene_product_molar_masses = ecoli_core_gene_product_masses,
+    capacity = total_enzyme_capacity,
+    optimizer = GLPK.Optimizer,
+)
+
+# In this case, the result is the same as with the full analysis:
+
+simplified_ec_solution.capacity_limits.total_capacity
+
+# Gene product amounts are not present in the model but are reconstructed
+# nevertheless (they are uniquely determined by the flux:
+
+simplified_ec_solution.gene_product_amounts
+
+@test isapprox( #src
+    ec_solution.objective, #src
+    simplified_ec_solution.objective, #src
+    atol = TEST_TOLERANCE, #src
+) #src
diff --git a/src/builders/enzymes.jl b/src/builders/enzymes.jl
@@ -69,9 +69,9 @@ export isozyme_flux_constraints
 """
 $(TYPEDSIGNATURES)
 
-A constraint tree that binds the isozyme amounts to gene product amounts
-accordingly to their multiplicities (aka. stoichiometries, protein units, ...)
-given by `isozyme_stoichiometry`.
+A constraint tree that computes the gene product amounts from given isozyme
+amounts their multiplicities (aka. stoichiometries, protein units, ...) given
+by `isozyme_stoichiometry`.
 
 Values in ConstraintTree `gene_product_amounts` should describe the gene
 product allocations.  Allocation for the isozyme is ignored if the gene product
@@ -85,30 +85,24 @@ and reverse-direction amounts at once. To only use a single tree, use an
 uni-tuple: `isozyme_amounts = tuple(my_tree)`.
 
 Parameter function `isozyme_stoichiometry` gets called with a reaction and
-isozyme ID as given by the isozyme amount trees. It should return `nothing` in
+isozyme IDs as given by the isozyme amount trees. It should return `nothing` in
 case there's no information -- in such case, the isozyme is *not going to be
 included* in the calculation of gene product mass.
 """
-function gene_product_isozyme_constraints(
-    gene_product_amounts::C.ConstraintTree,
-    isozyme_amounts,
-    isozyme_stoichiometry,
-)
+function isozyme_gene_product_amount_constraints(isozyme_amounts, isozyme_stoichiometry)
     res = C.ConstraintTree()
-    # This needs to invert the stoichiometry mapping,
+    # This needs to "invert" the stoichiometry mapping,
     # so we patch up a fresh new CT in place.
     for iss in isozyme_amounts
         for (rid, is) in iss
             for (iid, i) in is
                 gpstoi = isozyme_stoichiometry(rid, iid)
                 isnothing(gpstoi) && continue
                 for (gp, stoi) in gpstoi
-                    haskey(gene_product_amounts, gp) || continue
                     if haskey(res, gp)
                         res[gp].value += i.value * stoi
                     else
-                        res[gp] =
-                            C.Constraint(i.value * stoi - gene_product_amounts[gp].value, 0)
+                        res[gp] = C.Constraint(i.value * stoi)
                     end
                 end
             end
@@ -117,7 +111,7 @@ function gene_product_isozyme_constraints(
     res
 end
 
-export gene_product_isozyme_constraints
+export isozyme_gene_product_amount_constraints
 
 """
 $(TYPEDSIGNATURES)
@@ -126,8 +120,6 @@ Returns a constraint tree with enzyme capacity constraints, added for reactions
 in `fluxes_forward` and `fluxes_reverse`. This is used to construct the
 constraint system in [`enzyme_constrained_flux_balance_constraints`](@ref).
 
-Parameter `gene_ids` is an iterable collection of gene identifiers (as `Symbol`s).
-
 Parameter function `isozyme_ids` takes a reaction ID and returns `nothing` if
 the reaction does not have isozymes associated with it, or an iterable
 container of all the isozyme IDs for that reaction (as `Symbol`s).
@@ -136,25 +128,18 @@ Parameters `isozyme_forward_ids` and `isozyme_reverse_ids` can be used to
 fine-tune the generated isozymes in either direction; both default to
 `isozyme_ids`.
 
-Parameter function `gene_product_bound` can be used to generate a bound on the
-gene product amount variables (the `Symbol` key is passed as a parameter). By
-default, all amounts are bounded to be non-negative.
-
 The keys in the output constraint tree can be customized by setting
 `isozyme_forward_amounts_name`, `isozyme_reverse_amounts_name` and
 `gene_product_amounts_name`.
 """
 enzyme_variables(;
     fluxes_forward::C.ConstraintTree,
     fluxes_reverse::C.ConstraintTree,
-    gene_ids = Symbol[],
     isozyme_ids = _ -> nothing,
     isozyme_forward_ids = isozyme_ids,
     isozyme_reverse_ids = isozyme_ids,
-    gene_product_bound = _ -> C.Between(0, Inf),
     isozyme_forward_amounts_name = :isozyme_forward_amounts,
     isozyme_reverse_amounts_name = :isozyme_reverse_amounts,
-    gene_product_amounts_name = :gene_product_amounts,
 ) =
     isozyme_forward_amounts_name^isozyme_amount_variables(
         Symbol.(keys(fluxes_forward)),
@@ -163,10 +148,6 @@ enzyme_variables(;
     isozyme_reverse_amounts_name^isozyme_amount_variables(
         Symbol.(keys(fluxes_reverse)),
         isozyme_reverse_ids,
-    ) +
-    gene_product_amounts_name^C.variables(
-        keys = gene_ids,
-        bounds = gene_product_bound.(gene_ids),
     )
 
 export enzyme_variables
@@ -196,46 +177,125 @@ gene_product_ids, capacity_bound)` which are converted to a constraint
 identified by the `limit_id` that limits the total mass of `gene_product_ids`
 (which is any iterable container) by `capacity_bound`.
 """
-enzyme_constraints(;
+function enzyme_constraints(;
     fluxes_forward::C.ConstraintTree,
     fluxes_reverse::C.ConstraintTree,
     isozyme_forward_amounts::C.ConstraintTree,
     isozyme_reverse_amounts::C.ConstraintTree,
-    gene_product_amounts::C.ConstraintTree,
     kcat_forward = (_, _) -> nothing,
     kcat_reverse = (_, _) -> nothing,
     isozyme_gene_product_stoichiometry = (_, _) -> nothing,
     gene_product_molar_mass = _ -> nothing,
     capacity_limits = [],
     isozyme_flux_forward_balance_name = :isozyme_flux_forward_balance,
     isozyme_flux_reverse_balance_name = :isozyme_flux_reverse_balance,
-    gene_product_isozyme_balance_name = :gene_product_isozyme_balance,
+    gene_product_amounts_name = :gene_product_amounts,
     gene_product_capacity_name = :gene_product_capacity,
-) =
-    isozyme_flux_forward_balance_name^isozyme_flux_constraints(
-        isozyme_forward_amounts,
-        fluxes_forward,
-        kcat_forward,
-    ) *
-    isozyme_flux_reverse_balance_name^isozyme_flux_constraints(
-        isozyme_reverse_amounts,
-        fluxes_reverse,
-        kcat_reverse,
-    ) *
-    gene_product_isozyme_balance_name^gene_product_isozyme_constraints(
-        gene_product_amounts,
+)
+    gene_product_amounts = isozyme_gene_product_amount_constraints(
         (isozyme_forward_amounts, isozyme_reverse_amounts),
         isozyme_gene_product_stoichiometry,
-    ) *
-    gene_product_capacity_name^C.ConstraintTree(
-        Symbol(id) => C.Constraint(;
+    )
+
+    return isozyme_flux_forward_balance_name^isozyme_flux_constraints(
+               isozyme_forward_amounts,
+               fluxes_forward,
+               kcat_forward,
+           ) *
+           isozyme_flux_reverse_balance_name^isozyme_flux_constraints(
+               isozyme_reverse_amounts,
+               fluxes_reverse,
+               kcat_reverse,
+           ) *
+           gene_product_amounts_name^gene_product_amounts *
+           gene_product_capacity_name^C.ConstraintTree(
+               id => C.Constraint(;
+                   value = sum(
+                       gene_product_amounts[gp].value * gpmm for
+                       (gp, gpmm) in ((gp, gene_product_molar_mass(gp)) for gp in gps) if
+                       !isnothing(gpmm) && haskey(gene_product_amounts, gp)
+                   ),
+                   bound,
+               ) for (id, gps, bound) in capacity_limits
+           )
+end
+
+export enzyme_constraints
+
+"""
+$(TYPEDSIGNATURES)
+
+Like [`isozyme_gene_product_amount_constraints`](@ref), but works with the
+"simplified" view where each reaction has an uniquely determined catalytic
+isozyme, as with [`simplified_enzyme_constraints`](@ref).
+
+As the main difference, the arguments are tuples that contain first the
+constraint tree without the "isozyme" layer (i.e., fluxes), and second a
+function that returns the gene product stoichiometry and the turnover number
+(again in a tuple) for the given flux identifier.
+"""
+function simplified_isozyme_gene_product_amount_constraints(x...)
+    res = C.ConstraintTree()
+    for (iss, stoikcatfn) in x
+        for (rid, i) in iss
+            x = stoikcatfn(rid)
+            isnothing(x) && continue
+            (gpstoi, kcat) = x
+            isnothing(kcat) && continue
+            for (gp, stoi) in gpstoi
+                if haskey(res, gp)
+                    res[gp].value += i.value * stoi / kcat
+                else
+                    res[gp] = C.Constraint(i.value * stoi / kcat)
+                end
+            end
+        end
+    end
+    res
+end
+
+export simplified_isozyme_gene_product_amount_constraints
+
+"""
+$(TYPEDSIGNATURES)
+
+Build a constraint system that bounds fluxes according to their enzyme mass
+requirements, with respect to per-reaction enzyme mass costs.
+
+Parameter functions `mass_cost_forward` and `mass_cost_reverse` take a flux ID
+(corresponding to a flux in `fluxes_forward` and `fluxes_reverse`) and return
+the enzyme mass required to catalyze one "unit" of reaction in the forward or
+reverse direction, respectively. Returning `nothing` ignores the mass cost.
+
+`capacity_limits` is an iterable container of triples `(limit_id, flux_ids,
+bound)`, which creates the capacity bounds over groups of fluxes (in the same
+manner as for gene products in [`enzyme_constraints`](@ref)).
+"""
+function simplified_enzyme_constraints(;
+    fluxes_forward::C.ConstraintTree,
+    fluxes_reverse::C.ConstraintTree,
+    mass_cost_forward = _ -> nothing,
+    mass_cost_reverse = _ -> nothing,
+    capacity_limits = [],
+)
+    function contribution(fl, cost, id)
+        c = cost(id)
+        (isnothing(c) || !haskey(fl, id)) && return zero(C.LinearValue)
+        return c * fl[id].value
+    end
+
+    return C.ConstraintTree(
+        id => C.Constraint(;
             value = sum(
-                gene_product_amounts[gp].value * gpmm for
-                (gp, gpmm) in ((gp, gene_product_molar_mass(gp)) for gp in gps) if
-                !isnothing(gpmm)
+                contribution(fluxes_forward, mass_cost_forward, f) for f in fs;
+                init = zero(C.LinearValue),
+            ) + sum(
+                contribution(fluxes_reverse, mass_cost_reverse, f) for f in fs;
+                init = zero(C.LinearValue),
             ),
             bound,
-        ) for (id, gps, bound) in capacity_limits
+        ) for (id, fs, bound) in capacity_limits
     )
+end
 
-export enzyme_constraints
+export simplified_enzyme_constraints