generated from SystemsBioinformatics/sbl-template-pub
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers_EFM_FBA.py
executable file
·3030 lines (2616 loc) · 142 KB
/
helpers_EFM_FBA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import copy
import os
from fractions import Fraction
import cbmpy as cbm
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
from matplotlib import cm
import matplotlib.cm as mplcm
import matplotlib.colors as colors
import matplotlib
from scipy.optimize import linprog
from ecmtool import extract_sbml_stoichiometry, get_conversion_cone
from ecmtool.helpers import to_fractions, unique
from ecmtool.network import add_reaction_tags # was updated in v.0.1.6, was add_debug_tags
# Set matplotlib params such that text is editable text
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
def findConstraintMetabolites(nzrc_dictionaries, intermediate_cmod):
"""
Checks which metabolite is coupled to the constrained reactions. If no metab is coupled, it stores the reaction
in reactions_to_tag, indicating that a virtual metabolite should be added to track this constrained reaction
using conversions
:param nzrc_dictionaries: list of dictionaries
Dictionary with 4 entries: (reaction id, reduced cost, obj/sec_obj/cons, value of bound that was hit)
:param intermediate_cmod: CBMPy-model object
metabolic model
:return nzrc_dictionaries: list of dictionaries
We have added to the dictionaries 5) species-ID of metabolite that is coupled to the constrained reaction
6) Stoichiometric coefficient with which this species is coupled to the constrained reaction
:return reactions_to_tag: list of reaction-IDs
"""
reactions_to_tag = []
for nzrc_ind, nzrc_dict in enumerate(nzrc_dictionaries):
re = intermediate_cmod.getReaction(nzrc_dict['rid']) # Get corresponding reaction
re_stoich = re.getStoichiometry()
# Try to find external species used in the reaction
found_external = False
for stoich_pair in re_stoich:
species = intermediate_cmod.getSpecies(stoich_pair[1])
if species.compartment in ['e', 'Extra_organism']:
nzrc_dictionaries[nzrc_ind]['ext_metab'] = stoich_pair[1]
nzrc_dictionaries[nzrc_ind]['ext_metab_stoich'] = stoich_pair[0]
found_external = True
break
# If there are two externals in the reaction, we only need to track one of them
# If not we add the reaction to the list reactions_to_tag. The name of the virtual metabolite is now based on
# what is used in ECMtool. I don't know if this can be done more generically
# TODO: Try to find a more generic way
if not found_external:
print('No external is found. This reaction is tagged with a virtual external metabolite')
rea_id = nzrc_dict['rid']
reactions_to_tag.append(rea_id)
nzrc_dictionaries[nzrc_ind]['ext_metab'] = 'virtual_tag_%s' % rea_id
nzrc_dictionaries[nzrc_ind]['ext_metab_stoich'] = 1
return nzrc_dictionaries, reactions_to_tag
def get_info_objectives_constraints(nzrc_dictionaries, intermediate_cmod):
"""
:param nzrc_dictionaries: list of dictionaries
Dictionary with 4 entries: (reaction id, reduced cost, obj/sec_obj/cons, value of bound that was hit)
5) species-ID of metabolite that is coupled to the constrained reaction 6) Stoichiometric coefficient
with which this species is coupled to the constrained reaction
:param intermediate_cmod: CBMPy model object
:return infos_obj: list of dictionaries
Contains the same info as nzrc_dictionaries but only about the objective and the secondary objectives
:return infos_cons: list of dictionaries
Contains the same info as nzrc_dictionaries but only about the constraints
"""
infos_obj = []
infos_cons = []
active_obj_rid = intermediate_cmod.getActiveObjectiveReactionIds()[0]
# TODO: Make the objective metabolite name: 'objective' more generic. Now this is based on that I know what ECMtool adds
# First create dictionary with information about the objective function.
obj_dict = {'rid': active_obj_rid, 'nzrc': np.nan, 'obj_cons': 'obj', 'flux_val': np.nan, 'ext_metab': 'objective',
'ext_metab_stoich': 1.0}
infos_obj.append(obj_dict)
# Then separate the dictionaries in nzrc_dictionaries into objective dictionaries and constrained dictionaries
for i, nzrc_dict in enumerate(nzrc_dictionaries):
infos_cons.append(nzrc_dict) if nzrc_dict['obj_cons'] == 'cons' else infos_obj.append(nzrc_dict)
return infos_obj, infos_cons
def delete_non_active_network(original_cmod, which_zeros='flux_zero', zero_tol=1e-9, opt_tol=1e-8, exceptions=[]):
"""
Deletes reactions of SBML-model if zero in FBA-solution.
Checks if the objective function is really not changed due to the deletions.
:param original_cmod: cbmpy.CBModel
An FBA should already have been performed
:param which_zeros: string
Determines which reactions are deleted. Options
'flux_zero' deletes all reactions that have zero flux in the optimum
'FVA_zero' deletes only reactions that have zero flux in all FVA solutions
:param zero_tol: float
Determines when a reaction is zero enough
:param opt_tol: float
We consider the objective function as not changing if the change is less than this value
:param exceptions: list
Reactions that will not be deleted.
:return cmod: CBMPy-model
This model was reduced in size by deleting reactions and metabolites that were not active
"""
delete_reaction = [] # Initializes list for reactions that need to be deleted
opt_obj = original_cmod.getObjFuncValue() # Needed for checking if we did not remove too much
zero_tol = zero_tol * opt_obj
deletion_success = False
counter = 0
N_ITER = 10
if which_zeros is 'FVA_zero':
cbm.doFVA(original_cmod)
cmod = original_cmod.clone()
# if which_zeros is 'FVA_zero':
# cbm.doFVA(cmod)
while not deletion_success: # Runs until we have thrown away only reactions that do not affect the objective
print("This is round", counter, "of", N_ITER)
for rid in cmod.getReactionIds():
reaction = cmod.getReaction(rid)
if abs(reaction.value) <= zero_tol:
if which_zeros is 'flux_zero':
delete_reaction.append(rid)
elif which_zeros is 'FVA_zero':
if max(np.abs([reaction.fva_max, reaction.fva_min])) <= zero_tol:
delete_reaction.append(rid)
# delete all reactions in delete_reaction from model
for rid in delete_reaction:
if rid not in exceptions:
cmod.deleteReactionAndBounds(rid)
cbm.doFBA(cmod)
changed_obj = abs(cmod.getObjFuncValue() - opt_obj)
if changed_obj <= opt_tol: # Check if objective value didn't change
deletion_success = True
print("Reaction deletion succeeded.")
else:
cmod = original_cmod.clone()
zero_tol = zero_tol / 10 # If we have removed to much: Adjust zero_tol and try again
delete_reaction = []
if counter <= N_ITER:
counter += 1
else:
print("Reaction deletion did not succeed within %d iterations." % N_ITER)
break
if deletion_success:
# Then delete metabolites that are no longer used by any reaction
stoich_matrix = cmod.N.array
n_reacs_per_metab = np.count_nonzero(stoich_matrix, axis=1)
inactive_metabs = [cmod.species[metab_index].id for metab_index in range(stoich_matrix.shape[0]) if
n_reacs_per_metab[metab_index] == 0]
for metab_id in inactive_metabs:
print('Deleting %s because it is not used in active network.' % metab_id)
cmod.deleteSpecies(metab_id)
return cmod
else:
return None
def create_KO_models(original_cmod, n_KOs=3, zero_tol=1e-6, opt_tol=1e-8):
"""
Deletes reactions of SBML-model if zero in FBA-solution.
Checks if the objective function is really not changed due to the deletions.
:param cmod: cbmpy.CBModel
An FBA should already have been performed
:param which_zeros: string
Determines which reactions are deleted. Options
'flux_zero' deletes all reactions that have zero flux in the optimum
'FVA_zero' deletes only reactions that have zero flux in all FVA solutions
:param zero_tol: float
Determines when a reaction is zero enough
:param opt_tol: float
We consider the objective function as not changing if the change is less than this value
"""
active_reaction_lists = []
KOs_done = []
opt_obj = original_cmod.getObjFuncValue()
obj_id = original_cmod.obj_func.fluxObjectives[0].reaction
print(opt_obj)
zero_tol = zero_tol * opt_obj
# Get reactions that are active in first model
active_reacs = [rid for rid in original_cmod.getReactionIds() if
abs(original_cmod.getReaction(rid).value) > zero_tol]
active_reaction_lists.append(active_reacs)
KO_candidates = [rid for rid in active_reacs if not rid[:4] == 'R_EX' and not rid == obj_id]
for KO_ind in range(n_KOs):
if not len(KO_candidates):
break
test_cmod = original_cmod.clone()
KO_success = False
counter = 0
while not KO_success:
KOs_to_do = KOs_done + [KO_candidates[counter]]
for KO_to_do in KOs_to_do:
test_cmod.deleteReactionAndBounds(KO_to_do)
cbm.doFBA(test_cmod)
if test_cmod.getObjFuncValue() > 0.01 * opt_obj:
KO_success = True
else:
test_cmod = original_cmod.clone()
counter += 1
if counter == len(KO_candidates):
break
if KO_success:
active_reacs_KO = [rid for rid in test_cmod.getReactionIds() if
abs(test_cmod.getReaction(rid).value) > zero_tol]
active_reaction_lists.append(list(set(active_reaction_lists[-1]) | set(active_reacs_KO)))
KOs_done = KOs_to_do
KO_candidates = list(set(KO_candidates).intersection(set(active_reacs_KO)))
# Now make models with only the active reactions in the list above
cmod_list = []
for model_ind in range(1, len(active_reaction_lists)):
active_rids = active_reaction_lists[model_ind]
new_model = original_cmod.clone()
delete_reactions = [rid for rid in new_model.getReactionIds() if rid not in active_rids]
for rid in delete_reactions:
new_model.deleteReactionAndBounds(rid)
cbm.doFBA(new_model)
if new_model.SOLUTION_STATUS == 'LPS_OPT':
new_model.buildStoichMatrix()
stoich_matrix = new_model.N.array
n_reacs_per_metab = np.count_nonzero(stoich_matrix, axis=1)
inactive_metabs = [new_model.species[metab_index].id for metab_index in range(stoich_matrix.shape[0]) if
n_reacs_per_metab[metab_index] == 0]
for metab_id in inactive_metabs:
print('Deleting %s because it is not used in active network.' % metab_id)
new_model.deleteSpecies(metab_id)
new_model.buildStoichMatrix()
print('objective value of model is %d' % new_model.getObjFuncValue())
cmod_list.append(new_model)
else:
print("FBA of this model didn't result in optimal solution; model is skipped.")
return cmod_list
def get_nzrc(cmod):
"""
Gets all reactions with non-zero-reduced-costs (nzrc). A reaction has nzrc if the objective function value changes
if the bounds on this constraint changes. We will only consider reactions with nzrc and flux value not equal to 0.
These are irreversible reactions that would be favorable as reversible, but we don't want to consider those.
We also round the nzrc values to 10 decimals, this prevents very small nzrc to pop-up, who won't be present in the
active network or active FVA network.
We note if the nzrc is due to a fixed objective flux (e.g. ATP-maintenance) or due to an actual constraint
:returns nzrc_dictionaries: list of dictionaries
Dictionary with 4 entries: (reaction id, reduced cost, obj/sec_obj/cons, value of bound that was hit)
:returns n_objectives: int
Shows how many objectives there are in the problem. Certainly 1 (objective function), but can be more if
there are fluxes that should have some value. Reducing these values would increase the objective flux.
:param cmod: cbmpy.CBModel
An FBA should already have been performed
"""
nzrc_dictionaries = []
# We only consider reactions with nzrcs with flux value not equal to 0.
# The following finds tuples of reaction ids and their reduced costs.
non_zero_reduced_cost_pairs = [(rid, cmod.getReaction(rid).reduced_cost) for rid in
cmod.getReactionIds() if
round(abs(cmod.getReaction(rid).reduced_cost), 10) > 0] # round to 10 decimals #15?
# TODO: This does not work with more general constraints than flux bound yet.
n_objectives = 1
# We assume that we at least have one objective, but lower bounds on positive fluxes can be seen as secondary
# objectives, since the model needs to satisfy this bound as cheap as possible
for nzrc_pair in non_zero_reduced_cost_pairs:
rid = nzrc_pair[0]
nzrc = nzrc_pair[1]
flux_value = cmod.getReaction(rid).getValue()
if not flux_value == 0.: # We don't consider reactions that are zero.
if nzrc_pair[1] * flux_value <= 0: # An extra optimisation: to make this flux as cheap as possible
n_objectives += 1
obj_cons = 'sec_obj'
else: # A constraint
obj_cons = 'cons'
nzrc_dict = {'rid': rid, 'nzrc': nzrc, 'obj_cons': obj_cons, 'flux_val': flux_value}
nzrc_dictionaries.append(nzrc_dict)
return nzrc_dictionaries, n_objectives
def get_network_class(file_path, reactions_to_tag=[], use_external_compartment=None):
"""
:return network: network class
:param file_path: string
String with path to the file.
:param reactions_to_tag: list of strings
Strings with reaction IDs that should be tagged with a virtual metabolite
"""
# Stap 1: Function from ECMtool that builds network class
network = extract_sbml_stoichiometry(file_path, determine_inputs_outputs=True,
use_external_compartment=use_external_compartment)
# Find indices of reactions that should be tagged
indices_to_tag = []
if len(reactions_to_tag) > 0:
for rid in reactions_to_tag:
ind_to_tag = [ind for ind, reaction in enumerate(network.reactions) if reaction.id == rid]
indices_to_tag.append(ind_to_tag[0])
add_reaction_tags(network, reactions=indices_to_tag) # Function from ECMtool that adds the virtual metabolites
return network
def calc_ECMs(file_path, reactions_to_tag=[], print_results=False, hide_metabs=[], use_external_compartment=None,
only_rays=False):
"""
Calculates ECMs using ECMtool
:return ecms: np.array
This array contains the ECMs as columns and the metabolites as rows
:param file_path: string
String with path to the SBML-file.
:param reactions_to_tag: list with strings
List with reaction-IDs of reactions that need to be tagged
:param print_results: Boolean
:param use_external_compartment=None default # if a string is given, this is used to detect external metabolites
:param hide_metabs: indices of metabolites that should be ignored
"""
# Stap 1: netwerk bouwen
network = extract_sbml_stoichiometry(file_path, determine_inputs_outputs=True,
use_external_compartment=use_external_compartment)
indices_to_tag = []
if len(reactions_to_tag) > 0:
for rid in reactions_to_tag:
ind_to_tag = [ind for ind, reaction in enumerate(network.reactions) if reaction.id == rid]
indices_to_tag += ind_to_tag
add_reaction_tags(network, reactions=indices_to_tag)
if len(hide_metabs) > 0:
network.hide(hide_metabs)
full_network = copy.deepcopy(network)
orig_N = network.N
# Stap 2: compress network
if print_results:
print("\n", "Compressing network")
network.compress(verbose=True, cycle_removal=False)
# Stap 3: Ecms enumereren
# TODO: add timer on enumerating ECMs. tic toc?
if print_results:
print("\n", "Enumerating ECMs")
cone = network.uncompress(
get_conversion_cone(network.N, network.external_metabolite_indices(), network.reversible_reaction_indices(),
network.input_metabolite_indices(), network.output_metabolite_indices(),
only_rays=only_rays,
verbose=True))
if print_results:
print_ECMs(cone, indices_to_tag, network, orig_N, add_objective_metabolite=True, check_feasibility=True)
cone = cone.transpose() # columns will be the different ECMs, rows are metabolites
return cone, full_network
def normalize_to_row(matrix, row_ind, not_normalized_yet):
"""
:param matrix: np.array
Matrix that should be normalized
:param row_ind: int
Row that should be normalized to
:param not_normalized_yet: list of ints
List of column indices that still need normalization
:return: matrix: np.array
Normalized matrix
:return: not_normalized_yet: list of ints
Updated list of column indices that still need normalization
"""
obj_row = matrix[row_ind, :]
div_factors = [Fraction(1, 1)] * matrix.shape[1] # By default, divide by 1
normalized_indices = []
ecms_to_be_normalized = []
# Find those colunns that are not yet normalized, but can be normalized using this row
for col_ind, ecm_ind in enumerate(not_normalized_yet):
if obj_row[ecm_ind] != 0:
div_factors[ecm_ind] = abs(obj_row[ecm_ind]) # If column can be normalized, divide by the obj_row-value
normalized_indices.append(col_ind)
ecms_to_be_normalized.append(ecm_ind)
div_factors = np.array(div_factors)[ecms_to_be_normalized]
divisor_matrix = np.tile(div_factors, (matrix.shape[0], 1))
matrix[:, ecms_to_be_normalized] = np.divide(matrix[:, ecms_to_be_normalized], divisor_matrix)
not_normalized_yet = np.delete(not_normalized_yet, normalized_indices)
return matrix, not_normalized_yet
def igen(a, n, m):
"""
Generates a generator object that splits up a np.array in smaller np.arrays of size n x m, with the remaining in
a smaller np.array.
:param a: np.array
matrix that will be splitted in smaller matrices
:param n: int
size of matrix length
:param m int
size of matrix width
"""
i_ = np.arange(a.shape[0]) // n
j_ = np.arange(a.shape[1]) // m
for i, j in product(np.unique(i_), np.unique(j_)):
yield (i, j), a[i_ == i][:, j_ == j]
def matrix_splitter(matrix, split_num):
col_divider = list(range(matrix.shape[1]))
dict_col_divider = dict()
for i in range(int(np.ceil(matrix.shape[1] / split_num))):
dict_col_divider[i] = col_divider[i * split_num: (i + 1) * split_num]
dict_matrix = dict()
for i in dict_col_divider.keys():
dict_matrix[i] = matrix[:, dict_col_divider[i]]
return dict_matrix
def normalize_ECMS(ecms_matrix, network, normalization_order=[]):
"""
Normalizes ECMs first to first objective. If there are also lower bounds that act as a kind of second objective.
Then normalize the ECMs with zero objective to this second objective.
:return ecms_matrix: np.array
This array contains the normalized ECMs as columns and the metabolites as rows
:param ecms_matrix:
This array contains the ECMs as columns and the metabolites as rows
:param network: Network class
Network class as comes from ECMtool
:param normalization_order: list of metab-IDs
List of ordered metabolite-IDs
"""
MATRIX_SPLIT = 100000
# Determine an order for normalizing the metabolites, if none is given
# ECMs that are used often are picked first for normalization. To compare two ECM results, make sure to pick the
# same normalization order
if not len(normalization_order):
normalization_order = determine_normalization_order(ecms_matrix, network)
not_normalized_yet = list(range(ecms_matrix.shape[1])) # This tracks which ECMs need to be normalized still
zero_cols = np.where(np.count_nonzero(ecms_matrix, axis=0) == 0)[0]
not_normalized_yet = np.delete(not_normalized_yet, zero_cols)
# If matrix.shape[1] > 100000: split the matrix in smaller subsets.
if ecms_matrix.shape[1] > MATRIX_SPLIT: # 100000
# split matrix in smaller subsets
print('Split ECMs matrix, because number of ECMs is higher then ' + str(MATRIX_SPLIT) + ': ' + str(
ecms_matrix.shape[1]))
print('This takes time.')
matrix_dict = matrix_splitter(ecms_matrix, MATRIX_SPLIT)
print('ECMs matrix is splitted in ' + str(len(matrix_dict)))
# Create dict to keep track of which ECMs need to be normalized still.
dict_not_normalized_yet = dict()
for i in matrix_dict.keys():
dict_not_normalized_yet[i] = list(range(matrix_dict[i].shape[1]))
# Normalize ECMs
for key in matrix_dict.keys():
print('Normalizing matrix ' + str(key) + ' of ' + str(len(matrix_dict)))
# Then normalize all ECMs to one of the metabolites
for metab in normalization_order:
print('Normalizing with ' + metab + ' because ' + str(
len(dict_not_normalized_yet[key])) + ' ecms are not yet normalized.')
if not len(dict_not_normalized_yet[key]):
break
metab_index = [index for index, met in enumerate(network.metabolites) if met.id == metab][0]
matrix_dict[key], dict_not_normalized_yet[key] = normalize_to_row(matrix_dict[key], metab_index,
dict_not_normalized_yet[key])
if len(dict_not_normalized_yet[key]) == 0:
break
# If all ECMs are normalized, i.e. all lists in dict_not_normalized_yet are empty,
# then normalization is finished; normalized ECMs matrix will be returned
print('Normalization is done, splitted matrices will be concatenated and returned.')
# Combine splitted matrix into one again.
normalized_ecms_matrix = np.concatenate(
[matrix_dict[i] for i in list(range(len(matrix_dict)))],
axis=1)
return normalized_ecms_matrix
else:
# Then normalize all ECMs to one of the metabolites
for metab in normalization_order:
print('Normalizing with ' + metab + ' because ' + str(
len(not_normalized_yet)) + ' ecms are not yet normalized.')
if not len(not_normalized_yet):
break
metab_index = [index for index, met in enumerate(network.metabolites) if met.id == metab][0]
ecms_matrix, not_normalized_yet = normalize_to_row(ecms_matrix, metab_index, not_normalized_yet)
# If all ECMs are normalized, we can stop normalizing and normalized ecms_matrix will be returned
if len(not_normalized_yet) == 0:
return ecms_matrix
def normalize_ECMS_objective_first(ecms_matrix, network, infos_obj, verbose=True):
"""
Normalizes ECMs first to first objective. If there are also lower bounds that act as a kind of second objective.
Then normalize the ECMs with zero objective to this second objective.
:return ecms_matrix: np.array
This array contains the normalized ECMs as columns and the metabolites as rows
:param ecms_matrix:
This array contains the ECMs as columns and the metabolites as rows
:param network: Network class
Network class as comes from ECMtool
:param infos_obj: list of dictionaries
Dictionaries with information about the different objective functions
"""
# We first normalize for the objective metabolite
objective_metab = [info_dict['ext_metab'] for info_dict in infos_obj if np.isnan(info_dict['flux_val'])][0]
# Then for the metabolites that correspond to another objective (often a lower bound on a certain flux)
secondary_objectives = [info_dict['ext_metab'] for info_dict in infos_obj if
info_dict['ext_metab'] is not objective_metab]
# Then for all other metabolites
tertiary_objectives = []
tertiary_objectives_usage = []
for metab_ind, metab in enumerate(network.metabolites):
if metab.id is not objective_metab and metab.id not in secondary_objectives:
tertiary_objectives.append(metab.id) # Store all other metabolite ids
tertiary_objectives_usage.append(
np.count_nonzero(ecms_matrix[metab_ind, :])) # And their number of occurrences in ECMs
# Order the tertiary objectives for how often they are used
tertiary_objectives = [x for (y, x) in
sorted(zip(tertiary_objectives_usage, tertiary_objectives), key=lambda pair: pair[0],
reverse=True)]
if verbose:
print('Normalizing in the following order:')
print(objective_metab)
print(*secondary_objectives, sep=', ')
print(*tertiary_objectives, sep=', ')
# First normalize everything that can be normalized for the real objective
ecms_matrix = normalize_ECMS(ecms_matrix, network, [objective_metab] + secondary_objectives + tertiary_objectives)
return ecms_matrix
def calc_EFMs(network, result_dir, verbose=True):
"""
Calculates EFMs using EFMtool in Matlab. Saves a csv-file with the EFMs as columns.
IMPORTANT: This function will only work if the steps on the following site are used:
https://nl.mathworks.com/help/matlab/matlab_external/install-the-matlab-engine-for-python.html
:return efms_df: Pandas dataframe
This dataframe contains the EFMs as columns and the reactions as rows
:param network: Network class
Network-object as used in ECMtool for which EFMs should be calculated
"""
import matlab.engine
# Add your own folder here where efmtool is located
# efmtool_folder = "C:\\Users\\Eunice van Pelt\\Desktop\\whole-cell-model\\eunice\\Daan\\efmtool"
efmtool_folder = "C:\\Users\\Daan\\surfdrive\\PhD\\Software\\efmtool"
# Use the same stoichiometric matrix as used for the ECM-calculation
stoich_matrix = network.N
n_orig_reacs = len(network.reactions)
# Find reaction names in the order used in the network class
reac_names = [reac.id for reac in network.reactions]
# Find the same reversibilities as used for the ECM-calculation
reversibilities = [1 if reac.reversible else 0 for reac in network.reactions]
# Add an exchange reaction for external metabolites, otherwise no steady-state can be obtained
for ind_metab, metab in enumerate(network.metabolites):
if metab.is_external:
# Determine reversibilities of the exchange reaction, based on information if the metab
# is an input, output or both
reversible = 1 if metab.direction == 'both' else 0
# Create new column (reaction) in the stoichiometric matrix
col = to_fractions(np.zeros(shape=(stoich_matrix.shape[0], 1)))
col[ind_metab, 0] = -1 if metab.direction == 'output' else 1
stoich_matrix = np.append(stoich_matrix, col, axis=1)
reversibilities.append(reversible)
# if metab.is_external:
# reversible = 0
# if metab.direction in ['input','both']:
# col = to_fractions(np.zeros(shape=(stoich_matrix.shape[0], 1)))
# col[ind_metab, 0] = 1
# stoich_matrix = np.append(stoich_matrix, col, axis=1)
# reversibilities.append(reversible)
# if metab.direction in ['output', 'both']:
# col = to_fractions(np.zeros(shape=(stoich_matrix.shape[0], 1)))
# col[ind_metab, 0] = -1
# stoich_matrix = np.append(stoich_matrix, col, axis=1)
# reversibilities.append(reversible)
stoich_matrix = stoich_matrix.astype(dtype=float) # Matlab doesn't deal with the python Fractions
# Save the stoichiometric matrix and reversibilities in the result directory, so that Matlab can use it
np.savetxt(os.path.join(result_dir, "stoich_matrix.csv"), stoich_matrix, delimiter=",")
np.savetxt(os.path.join(result_dir, "reversibilities.csv"), reversibilities, delimiter=",")
engine = matlab.engine.start_matlab()
# Uses our own Matlab-function to set EFMtool to the task of calculating EFMs
result = engine.calculate_efms_wo_SBML(efmtool_folder, result_dir)
# print("matlab result \n", result)
size_efms = result.size
# print(size_efms)
# The following is a fast way of importing large arrays from Matlab
efms_matrix = np.reshape(np.array(result._data), size_efms, order='F')
# Crop virtual exchange reactions of
efms_matrix = efms_matrix[0:n_orig_reacs, :]
# Create dataframe with EFMs as the columns, and reactions_names as the rows
efms_df = pd.DataFrame(efms_matrix, index=reac_names)
engine.quit()
return efms_df
def plot_costs(model_dict, infos_obj, infos_cons, cons_IDs=[], obj_val=0.33, show_active=True, result_dir=None,
vector_focus=True, squared_plot=True):
"""
Plots the costs of all ECMs in cost_df in the cost vector figure, and shows ECM_usage in the FBA-solution by having
vectors.
:param result_dir: directory path
Directory for storing the figures
:param model_dict: dictionary
Dictionary with all information about one of the models that we are considering
:param infos_obj: list
List of dictionaries concerning the different objectives
:param infos_cons: list
List of dictionaries concerning the different constraints
:param cons_IDs: list of cons_IDs
List of strings reflecting the different constraints that should be plotted
:param obj_val: float
Objective value needed for potentially rescaling the plot
:param show_active: Boolean
Show or not show ECM usage
:param vector_focus: Boolean
Focus on vectors or show all ECMs
:param squared_plot: Boolean
Results in squared subplots with equal x and y upper limit.
"""
# Some important numbers
n_objectives = len(infos_obj)
scatter_active = 100
alpha_active = 0.5
scatter_normal = 15
quiverwidth = 0.01
# If the objective is really large, we will show the costs to produce more than one unit objective. We use the
# scaling factor to calculate the new costs
scale_obj_val = np.floor(np.log2(obj_val))
scaling_factor = 2 ** scale_obj_val
# Find the indices of the ECMs that are active in the FBA solution
actECMs_inds = list(model_dict['table_cons_df'].loc[model_dict['table_cons_df']['active'] > 0]['orig_ECM_number'])
# Make sure that we always have two constraints for the plot
cons_IDs = select_cons_IDs(infos_cons, cons_IDs)
# Create one figure with n subplots, where n is the number of 'objectives'. A lower bound that should be met is also
# considered an objective here.
if n_objectives > 3:
fig, axes = plt.subplots(np.int(np.ceil(n_objectives / 3)), 3, figsize=(7, 6))
quiverwidth = quiverwidth * 3
else:
fig, axes = plt.subplots(1, n_objectives, figsize=(7, 4))
if n_objectives == 1:
axes = [axes] # Necessary evil
# We loop over the subplots, i.e., the number of objectives
for index, objective_dict in enumerate(infos_obj):
if n_objectives > 3:
ax = axes[np.int(index / 3)][index % 3]
else:
ax = axes[index]
plt.sca(ax)
obj_string = objective_dict['ext_metab']
# In the x_ulims and y_ulims we will store a number of candidates for the upper bound of the plot. We will
# eventually take the maximum of these candidates
x_ulims = []
y_ulims = []
x_llims = []
y_llims = []
# Make cost table with only the constraints that are to be shown
cost_df, cons_indices = get_cost_df(model_dict, infos_cons, cons_IDs, index, objective_dict,
scaling_factor=scaling_factor)
if show_active and 'active' not in cost_df.columns:
print("Can't show active EFMs if this information is not provided.")
return
x_label = cons_IDs[0]
y_label = cons_IDs[1]
# Determine the bounds for the axes along which we want to plot the costs. It is a bit complicated, but
# works
if objective_dict['obj_cons'] == 'obj':
x_llims.append(-0.05)
x_ulims.append(max(1.1, min(5 * scaling_factor / obj_val, max(cost_df.values[:, 0]) * 1.1)))
if not vector_focus:
x_ulims.append(max(cost_df[cons_IDs[0]]) * 1.1)
x_llims.append(min(0, min(cost_df[cons_IDs[0]])))
y_llims.append(-0.05)
y_ulims.append(max(1.1, min(5 * scaling_factor / obj_val, max(cost_df.values[:, 1]) * 1.1)))
if not vector_focus:
y_ulims.append(max(cost_df[cons_IDs[1]]) * 1.1)
y_llims.append(min(0, min(cost_df[cons_IDs[1]])))
else:
x_ulims.append(1.1)
y_ulims.append(1.1)
x_llims.append(-0.05)
if not vector_focus:
x_ulims.append(max(cost_df[cons_IDs[0]]) * 1.1)
x_llims.append(min(0, min(cost_df[cons_IDs[0]])))
y_llims.append(-0.05)
if not vector_focus:
y_ulims.append(max(cost_df[cons_IDs[1]]) * 1.1)
y_llims.append(min(0, min(cost_df[cons_IDs[1]])))
if 'virtual' in cons_IDs:
y_ulims = [0.3]
y_llims = [-0.3]
# Plot cost dots of normal ECMs
cost_df.plot.scatter(x=x_label, y=y_label, color='grey', ax=ax,
s=scatter_normal, label="ECM") # model_dict['model_name']) #, legend=False)
# Plot cost dots of active ECMs
actECMs = cost_df.loc[cost_df['orig_ECM_number'].isin(actECMs_inds)]
# The number of active ECMs for this objective can be lower than the number of active ECMs in total, because
# we cannot plot the costs for an ECM that does not contribute anything to this objective.
n_actECMs_curr_obj = len(actECMs)
# Here we will store the x,y-length of the cost vectors if multiplied by its activity
ecm_usage = np.zeros((n_actECMs_curr_obj, 2))
# x-length
ecm_usage[:, 0] = actECMs.values[:, cons_indices[0]] * actECMs['active']
# y-length
ecm_usage[:, 1] = actECMs.values[:, cons_indices[1]] * actECMs['active']
# Construct two columns of starting positions for the vectors for the vector addition with which we show the
# usage of the ECMs. The end point of the first vector is the starting point of the second vector, etc.
quiver_start = ecm_usage.cumsum(axis=0)
# Append starting point (0,0) of the first vector
quiver_start = np.append(np.zeros((1, 2)), quiver_start, axis=0)
# Delete last row because this is actually the end point of the last vector
quiver_start = quiver_start[:-1, :]
# Add the x,y-lengths of the vectors as two additional columns
ecm_usage = np.append(quiver_start, ecm_usage, axis=1)
# The following thus are the x,y-begin positions of the vectors, and the x,y-length
Xusage, Yusage, Uusage, Vusage = zip(*ecm_usage)
# Define colors in cmap
# We use gray for normal non-active ECMs, and a different colour for each active ECM
# Get the indices of the ECMs that contribute to this objective, needed for getting the right colour
curr_act_inds = [counter for counter, ind in enumerate(actECMs_inds) if ind in actECMs['orig_ECM_number']]
if len(actECMs_inds) > 20:
cmap1 = cm.get_cmap('tab20b', 20)
cmap2 = cm.get_cmap('tab20c', len(actECMs_inds) - 20)
cmap_colors = np.concatenate((cmap1.colors, cmap2.colors), axis=0)
cmap_curr = cmap_colors[curr_act_inds, :]
elif len(actECMs_inds) < 11:
cmap = cm.get_cmap('tab10', len(actECMs_inds))
cmap_curr = cmap.colors[curr_act_inds, :]
else: # if 10 < len(actECMs_inds) < 21
cmap = cm.get_cmap('tab20', len(actECMs_inds))
cmap_curr = cmap.colors[curr_act_inds, :]
# Plot the costs per unit objective of the active ECMs
actECMs.plot(kind='scatter', x=x_label, y=y_label, color=cmap_curr, ax=ax,
s=scatter_active, alpha=alpha_active,
label='Active ECM') # , legend=True, label=[str(i) for i in actECMs_inds])
# Set dimensions of the plot and configure axes
# Also we make a light grey "constraint-box". The allowed region for solutions should be in this box.
if vector_focus:
ax.set(adjustable='box', aspect='equal')
y_ulim = max(y_ulims)
x_ulim = max(x_ulims)
x_llim = min(x_llims)
y_llim = min(y_llims)
if squared_plot:
x_ulim = y_ulim = max(x_ulim, y_ulim)
ax.plot([1, 1], [y_llim, y_ulim], '--', color='grey', linewidth=2.0)
ax.set_xlim([x_llim, x_ulim])
ax.set_ylim([y_llim, y_ulim])
# plt.xlabel('Needed fraction of constraint: ' + x_label)
ax.set(xlabel='')
if 'virtual' in cons_IDs:
ax.axes.get_yaxis().set_visible(False)
ax.axhline(y=0, color='grey', linewidth=2.0)
else:
ax.set(ylabel='')
# plt.ylabel('Needed fraction of constraint: ' + y_label)
ax.plot([0, 1], [0, 1], '-.', color='grey')
ax.axes.get_yaxis().set_visible(True)
ax.plot([x_llim, x_ulim], [1, 1], '--', color='grey', linewidth=2.0)
# We change the fontsize of minor ticks label
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)
# Plot the usage of ECMs as vectors
for i in range(len(Xusage)):
color = cmap_curr[i, :]
ax.quiver(Xusage[i], Yusage[i], Uusage[i], Vusage[i], pivot='tail', angles='xy', scale_units='xy',
linestyle='--', color=color, scale=1, width=quiverwidth, zorder=3.)
# Set title of objective plot
if objective_dict['obj_cons'] == 'obj':
if n_objectives < 3:
ax.set_title("Fraction needed per %.2f " % scaling_factor + obj_string, size=10)
else:
ax.set_title("Fraction needed \n per %.2f: " % scaling_factor + obj_string, size=8)
# plt.title("Fraction needed per %.2f:\n" % scaling_factor + obj_string)
else:
# Get name of demanded reaction
if obj_string == "virtual_tag_R_ATPM":
obj_string_name = "ATP maintenance"
elif obj_string.startswith("virtual_tag_"):
obj_string_name = model_dict['model'].getReaction(obj_string.replace("virtual_tag_", "")).getName()
else: # external metabolite
obj_string_name = model_dict['model'].getSpecies(obj_string).getName()
# Set title of demanded reaction
if n_objectives < 3:
ax.set_title("Fraction needed for \n demanded: " + obj_string_name, size=10)
else:
ax.set_title("Fraction needed for \n demanded: " + obj_string_name, size=8)
# plt.title("Fraction needed for demanded:\n" + obj_string)
# Set legend of axes invisible
ax.get_legend().set_visible(False)
# get legend handles and labels to create shared legend
handles, labels = ax.get_legend_handles_labels()
# Create common x and y axis labels
fig.add_subplot(111, frame_on=False)
plt.tick_params(labelcolor="none", bottom=False, left=False)
plt.xlabel('Needed fraction of constraint: ' +
[met.name for met in model_dict['network'].metabolites if met.id == x_label][0])
if not 'virtual' in cons_IDs:
ylabel_name = [met.name for met in model_dict['network'].metabolites if met.id == y_label][0]
if "O2" in ylabel_name:
ylabel_name = "Oxygen"
plt.ylabel(ylabel='Needed fraction of constraint: ' + ylabel_name)
# Create shared legend based on latest ax handles and labels
fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.), loc='lower center',
borderaxespad=0., ncol=2,
frameon=False, prop={'size': 10})
plt.subplots_adjust(bottom=0.15, hspace=0.70)
if result_dir:
fig.savefig(
os.path.join(result_dir,
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".png"),
bbox_inches="tight")
fig.savefig(
os.path.join(result_dir,
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".pdf"),
bbox_inches="tight")
fig.savefig(
os.path.join(result_dir,
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".svg"),
bbox_inches="tight")
else:
fig.savefig(
os.path.join(os.getcwd(),
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".png"),
bbox_inches="tight")
fig.savefig(
os.path.join(os.getcwd(),
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".pdf"),
bbox_inches="tight")
fig.savefig(
os.path.join(os.getcwd(),
"cost_plot" + model_dict['model_name'] + "_" + cons_IDs[0] + "_" + cons_IDs[1] + ".svg"),
bbox_inches="tight")
def plot_costs_flux(model_dict, infos_obj, infos_cons, cons_IDs=[], obj_val=0.33, show_active=True, result_dir=None,
vector_focus=True, squared_plot=True):
"""
Plots the costs of all ECMs in cost_df in the cost vector figure, and shows ECM_usage in the FBA-solution by having
vectors.
:param result_dir: directory path
Directory for storing the figures
:param model_dict: dictionary
Dictionary with all information about one of the models that we are considering
:param infos_obj: list
List of dictionaries concerning the different objectives
:param infos_cons: list
List of dictionaries concerning the different constraints
:param cons_IDs: list of cons_IDs
List of strings reflecting the different constraints that should be plotted
:param obj_val: float
Objective value needed for potentially rescaling the plot
:param show_active: Boolean
Show or not show ECM usage
:param vector_focus: Boolean
Focus on vectors or show all ECMs
:param squared_plot: Boolean
Results in squared subplots with equal x and y upper limit.
"""
# Some important numbers
n_objectives = len(infos_obj)
scatter_active = 100
alpha_active = 0.5
scatter_normal = 15
quiverwidth = 0.01
# If the objective is really large, we will show the costs to produce more than one unit objective. We use the
# scaling factor to calculate the new costs
scale_obj_val = np.floor(np.log2(obj_val))
scaling_factor = 2 ** scale_obj_val
# Find the indices of the ECMs that are active in the FBA solution
actECMs_inds = list(model_dict['table_cons_df'].loc[model_dict['table_cons_df']['active'] > 0]['orig_ECM_number'])
# Make sure that we always have two constraints for the plot
cons_IDs = select_cons_IDs(infos_cons, cons_IDs)
cons_IDs = ['M_glc__D_e', 'M_o2_e']
# Create one figure with n subplots, where n is the number of 'objectives'. A lower bound that should be met is also
# considered an objective here.
if n_objectives > 3:
fig, axes = plt.subplots(np.int(np.ceil(n_objectives / 3)), 3, figsize=(7, 6))
quiverwidth = quiverwidth * 3
else:
fig, axes = plt.subplots(1, n_objectives, figsize=(7, 4))
if n_objectives == 1:
axes = [axes] # Necessary evil
# We loop over the subplots, i.e., the number of objectives
for index, objective_dict in enumerate(infos_obj):
if n_objectives > 3:
ax = axes[np.int(index / 3)][index % 3]
else:
ax = axes[index]
plt.sca(ax)
obj_string = objective_dict['ext_metab']
# In the x_ulims and y_ulims we will store a number of candidates for the upper bound of the plot. We will
# eventually take the maximum of these candidates
x_ulims = []
y_ulims = []
x_llims = []
y_llims = []
# Make cost table with only the constraints that are to be shown
cost_df, cons_indices = get_cost_df(model_dict, infos_cons, cons_IDs, index, objective_dict,
scaling_factor=scaling_factor, use_flux_bounds=False)
if show_active and 'active' not in cost_df.columns:
print("Can't show active EFMs if this information is not provided.")
return
x_label = cons_IDs[0]
y_label = cons_IDs[1]
# Determine the bounds for the axes along which we want to plot the costs. It is a bit complicated, but
# works
if objective_dict['obj_cons'] == 'obj':
x_llims.append(-0.05)
# x_ulims.append(max(1.1, min(5 * scaling_factor / obj_val, max(cost_df.values[:, 0]) * 1.1)))
x_ulims.append(20)