-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathpyipm.py
2137 lines (1905 loc) · 95.8 KB
/
pyipm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import print_function
import aesara as theano
import aesara.tensor as T
import numpy as np
from aesara.tensor.nlinalg import pinv
from aesara.tensor.basic import diag
from aesara.tensor.nlinalg import eigh
from aesara.tensor.slinalg import eigvalsh
from aesara.ifelse import ifelse
try:
FunctionType = theano.compile.function_module.Function
except AttributeError:
FunctionType = theano.compile.function.types.Function
def sym_solve(A, b):
# TODO: update to sym from gen solve once that becomes functional.
return T.slinalg.solve(A, b, assume_a='gen')
class IPM:
"""Solve nonlinear, nonconvex optimization problems using an interior-point method.
Detailed Description:
This solver uses a line search primal-dual interior-point method to solve
problems of the form
min f(x) subject to {ce(x) = 0} and {ci(x) >= 0}
x
where f(x) is a function of the weights x, {ce(x) = 0} is the set of M
equality constraints, and {ci(x) <= 0} is the set of N inequality
constraints. The solver finds a solution to an 'unconstrained'
transformation of the problem by forming the Lagrangian augmented by a
barrier term with coefficient mu when N > 0:
L(x, s, lda) = f - ce.dot(lda[:M]) - (ci-s).dot(lda[M:]) - mu * sum(log(s))
where s >= 0 are the slack variables (only used when N > 0) that transform
the inequality constraints into equality constraints and lda are the
Lagrange multipliers where lda[M:] >= 0. The optimization completes when
the first-order Karush-Kuhn-Tucker (KKT) conditions are satisfied to the
desired precision.
For more details on this algorithm, consult the references at the bottom.
In particular, this algorithm uses a line search interior-point
algorithm with a merit function based on Ch. 19 of [1].
Dependencies:
Required:
NumPy
SciPy
Theano
Optional:
Intel MKL, OpenBlas, ATLAS, or BLAS/LAPACK
Nvidia's CUDA or OpenCL
for more details on support for GPU software, see the Theano
documentation.
Examples:
For example usage of this solver class, see the problem definitions in
the main function at the bottom of this file, pyipm.py. Each of these
example problems can be run from the command line using argv input. For
example, if one wants to run example problem 3, enter this command into
the terminal from the directory that contains pyipm.py
python pyipm.py 3
and the solver will print the solution to screen. There are 10 example
problems that are called by numbers on the range 1 through 10.
Input types:
symbolic expression: refers to equations constructed using Theano
objects and syntax.
symbolic scalar/array: is the output of a symbolic expression.
Class Variables:
x0 (NumPy array): weight initialization (size D).
x_dev (Theano tensor): symbolic weight variables.
f (symbolic expression): objective/cost function; i.e. the function to be
minimized.
[Args] x_dev
[Returns] symbolic scalar
[NOTE] see note 1) below.
df (symbolic expression, OPTIONAL): gradient of the objective
function with respect to (wrt) x_dev.
[Args] x_dev
[Default] df is assigned through automatic symbolic differentiation
of f wrt x_dev
[Returns] symbolic array (size D)
[NOTE] see note 1) below.
d2f (symbolic expression, OPTIONAL): hessian of the objective function wrt
x_dev.
[Args] x_dev
[Default] d2f is assigned through automatic symbolic differentiation
of f wrt x_dev
[Returns] symbolic array (size DxD)
[NOTE] see notes 1) and 3) below.
ce (Theano expression, OPTIONAL): symbolic expression for the equality
constraints as a function of x_dev. This is required if dce or
d2ce is not None.
[Args] x_dev
[Default] None
[Returns] symbolic array (size M)
[NOTE] see note 1) below.
dce (Theano expression, OPTIONAL): symbolic expression for the Jacobian
of the equality constraints wrt x_dev.
[Args] x_dev
[Default] if ce is not None, then dce is assigned through automatic
symbolic differentiation of ce wrt x_dev; otherwise None.
[Returns] symbolic array (size DxM)
[NOTE] see notes 1) and 2) below.
d2ce (Theano expression, OPTIONAL): symbolic expression for the Hessian
of the equality constraints wrt x_dev and lambda_dev (see below).
[Args] x_dev, lambda_dev
[Default] if ce is not None, then d2ce is assigned through automatic
symbolic differentiation of ce wrt x_dev; otherwise None.
[Returns] symbolic array (size DxD)
[NOTE] see notes 1) and 3) below.
ci (Theano expression, OPTIONAL): symbolic expression for the
inequality constraints as a function of x_dev. Required if dci or
d2ci are not None.
[Args] x_dev
[Default] None
[Returns] symbolic array (size N)
[NOTE] see note 1) below.
dci (Theano expression, OPTIONAL): symbolic expression for the Jacobian
of the inequality constraints wrt x_dev.
[Args] x_dev
[Default] if ci is not None, then dci is assigned through automatic
symbolic differentiation of ci wrt x_dev; otherwise None
[Returns] symbolic array (size DxN)
[NOTE] see notes 1) and 2) below.
d2ci (Theano expression, OPTIONAL): symbolic expression for the Hessian
of the inequality constraints wrt x_dev and lambda_dev (see below).
[Args] x_dev, lambda_dev
[Default] if ci is not None, then d2ci is assigned through autormatic
symbolic differentiation of ci wrt x_dev; otherwise None
[Returns] symbolic array (size DxD)
[NOTE] see notes 1) and 3) below.
lda0 (NumPy array, OPTIONAL): Lagrange multiplier initialization (size
M+N). For equality constraints, lda0[:M] may take on any sign while
for inequality constraints all elements of lda0[M:] must be >=0.
[Default] if ce or ci is not None, then lda0 is initialized using dce
(if ce is not None), dci (if ci is not None), and df all evaluated
at x0 and the Moore-Penrose pseudoinverse; otherwise None
lambda_dev (Theano expression, OPTIONAL) symbolic Lagrange multipliers.
This only required if you supply your own input for d2ce or d2ci.
[Default] None
s0 (NumPy array, OPTIONAL): slack variables initialization (size N).
These are only set when inequality constraints are in use. The
slack varables must be s0 >= 0.
[Default] if ci is not None, then s0 is set to the larger of ci
evaluated at x0 or Ktol; otherwise None
mu (float, OPTIONAL): barrier parameter initialization (scalar>0).
[Default] 0.2
nu (float, OPTIONAL): merit function parameter initialization (scalar>0).
[Default] 10.0
rho (float, OPTIONAL): factor used for testing and updating nu (scalar
in (0,1)).
[Default] 0.1.
tau (float, OPTIONAL): fraction-to-the-boundary rule and backtracking
line search parameter (scalar in (0,1)).
[Default] 0.995
eta (float, OPTIONAL): Armijo rule parameter (Wolfe conditions) (scalar
in (0,1)).
[Default] 1.0E-4
beta (float, OPTIONAL): power factor used in Hessian regularization to
combat ill-conditioning. This is only relevant if ce is not None.
[Default] 0.4
miter (int, OPTIONAL): number of 'inner' iterations where mu is held
constant.
[Default] 20
niter (int, OPTIONAL): number of 'outer' iterations where mu is
adjusted.
[Default] 10
Xtol (float, OPTIONAL): weight precision tolerance (used only in
fraction-to-the-boundary rule).
[Default] np.finfo(float_dtype).eps (machine precision of
float_dtype; see below)
Ktol (float, OPTIONAL): convergence tolerance on the Karush-Kuhn-
Tucker (KKT) conditions.
[Default] 1.0E-4
Ftol (float, OPTIONAL): convergence tolerance on the change in f
between iterations. For constrained problems, f is measured after
each outer iteration and compared to the prior outer iteration.
[Default] None (when set to None, convergence is determined via the
KKT conditions alone).
lbfgs (integer, OPTIONAL): solve using the L-BFGS approximation of the
Hessian; set lbfgs to a positive integer equal to the number of
past iterations to store. If lbfgs=0 or False, the exact Hessian
will be used.
[Default] False
lbfgs_zeta (float, OPTIONAL): initialize the scaling of the initial
Hessian approximation with respect to the weights. The initial
approximation is lbfgs_zeta multiplied by a DxD identity matrix.
This must be a positive scalar.
[Default] 1.0
float_dtype (dtype, OPTIONAL): set the universal precision of all float
variables.
[Default] np.float64 (64-bit floats)
[NOTE] Using 32-bit precision is not recommended; the numerical
inaccuracy can lead to slow convergence.
verbosity (integer, OPTIONAL): screen output level from -1 to 3 where -1
is no feedback and 3 is maximum feedback.
[Default] 1
Notes:
-----------------
1) For flexibility, symbolic expressions for f, df, d2f, ce, dce, d2ce, ci
dci, and d2ci may be replaced with functions. Keep in mind, however, that
replacing f, ce, or ci with functions will disable automatic
differentiation capabilities. This option is available for those who wish
to avoid redefining Theano functions that have already been compiled.
However, using symbolic expressions may lead to a quicker optimization.
2) When defining your own Jacobians, dce and dci, keep in mind that dce and
dci are transposed relative to the output of Theano.gradient.jacobian()
and should be size DxM or DxN, respectively.
3) Depending on the sophistication of the problem you wish to optimize, it
may be better write your own symbolic expression/function for the second
derivative matrices d2f, d2ce, or/and d2ci. On some complicated and large
problems, automatic differentiation of the second derivatives may be
inefficient leading to slow computations of the Hessian.
Class Functions:
compile(nvar=None, neq=None, nineq=None): validate input,
form expressions for the Lagrangian and its gradient and Hessian,
form expressions for weight and Lagrange multiplier initialization,
define device variables, and compile symbolic expressions.
[Args] nvar (optional), neq (optional), nineq (optional)
* nvar (int, scalar) must be set to the number of weights if x0 is
uninitialized
* neq (int, scalar) must be set to the number of equality constraints
if x0 is unintialized, M
* nineq (int, scalar) must be set to the number of inequality
constraints if x0 is uninitialized, N (scalar)
[NOTE] If the user runs compile on an instance of the
solver object and then intends to reuse the solver object after
changing the symbolic Theano expressions defined in the Class
Variables section, compile will need to be rerun to
compile any modified/new expressions.
solve(x0=None, s0=None, lda0=None, force_recompile=False): run the interior
-point method solver.
[Args] x0 (optional), s0 (optional), lda0 (optional),
force_recompile (optional)
* x0 (NumPy array, size D) can be used to initialize the weights if
they are not already initialized or to reinitialize the weights
* s0 (NumPy array, size N) gives the user control over initialization
of the slack variables, if desired (size N)
* lda0 (NumPy array, size M+N) gives the user control over
initialization of the Lagrange multipliers, if desired
* force_recompile (bool, scalar) the solve() class function
automatically calls the compile() class function on its first
execution. On subsequent executions, solve() will not automatically
recompile unless force_recompile is set to True.
[Returns] (x, s, lda, fval, kkt)
* x (NumPy array, size D) are the weights at the solution
* s (NumPy array, size N) are the slack variables at the solution
* lda (NumPy array, size M+N) are the Lagrange multipliers at the
solution
* fval (float, scalar) is f evaluated at x
* kkt (list) is a list of the first-order KKT conditions solved at x
(see class function KKT for details)
[NOTE] If the solver is used more than once, it will likely be
necessary to reinitialize mu and nu since they are left in their
final state after the solver is used.
KKT(x, s, lda): calculate the first-order KKT conditions.
[Args] x, s, lda
* x (NumPy array, size D) are the weights
* s (NumPy array, size N) are the slack variables
* lda (NumPy array, size M+N) are the Lagrange multipliers
[Returns] [kkt1, kkt2, kkt3, kkt4]
* kkt1 (NumPy array, size D) is the gradient of the Lagrangian barrier
problem at x, s, and lda
* kkt2 (NumPy array, size N) is the gradient of the dual barrier
problem at x, s, and lda; if there are no inequality constraints,
then kkt2=0
* kkt3 (NumPy array, size M) is the equality constraint satisfaction
(i.e. solve ce at x); if there are no equality constraints, then
kkt3=0
* kkt4 (NumPy array, size N) is the inequality constraint
satisfaction (i.e. solve ci at x); if there are inequality
constraints, then kkt=0
[NOTE] All arguments are required. If s and/or lda are irrelevant to
the user's problem, set those variables to a 0 dimensional NumPy
array.
References:
[1] Nocedal J & Wright SJ, 'Numerical Optimization', 2nd Ed. Springer (2006).
[2] Byrd RH, Nocedal J, & Schnabel RB, 'Representations of quasi-Newton
matrices and their use in limited memory methods', Mathematical
programming, 63(1), 129-156 (1994).
[3] Wachter A & Biegler LT, 'On the implementation of an interior-point
filter line-search algorithm for large-scale nonlinear programming',
Mathematical programming, 106(1), 25-57 (2006).
TO DO: Translate line-search into Theano
"""
def __init__(self, x0=None, x_dev=None, f=None, df=None, d2f=None, ce=None, dce=None, d2ce=None, ci=None, dci=None,
d2ci=None, lda0=None, lambda_dev=None, s0=None, mu=0.2, nu=10.0, rho=0.1, tau=0.995, eta=1.0E-4,
beta=0.4, miter=20, niter=10, Xtol=None, Ktol=1.0E-4, Ftol=None, lbfgs=False, lbfgs_zeta=None,
float_dtype=np.float64, verbosity=1):
self.x0 = x0
self.x_dev = x_dev
self.lda0 = lda0
self.lambda_dev = lambda_dev
self.s0 = s0
self.f = f
self.df = df
self.d2f = d2f
self.ce = ce
self.dce = dce
self.d2ce = d2ce
self.ci = ci
self.dci = dci
self.d2ci = d2ci
self.nvar = None
self.neq = None
self.nineq = None
self.eps = np.finfo(float_dtype).eps
self.mu = mu
self.nu = nu
self.rho = rho
self.tau = tau
self.eta = eta
self.beta = beta
self.miter = miter
self.niter = niter
if Xtol:
self.Xtol = Xtol
else:
self.Xtol = self.eps
self.Ktol = Ktol
self.Ftol = Ftol
self.reg_coef = float_dtype(np.sqrt(self.eps))
self.lbfgs = lbfgs
if self.lbfgs and lbfgs_zeta is None:
self.lbfgs_zeta = float_dtype(1.0)
else:
self.lbfgs_zeta = lbfgs_zeta
self.lbfgs_fail_max = lbfgs
self.float_dtype = float_dtype
self.nu_dev = theano.shared(self.float_dtype(self.nu), name='nu_dev')
self.mu_dev = theano.shared(self.float_dtype(self.mu), name='mu_dev')
self.dz_dev = T.vector('dz_dev')
self.b_dev = T.matrix('b_dev')
self.M_dev = T.matrix('M_dev')
self.s_dev = T.vector('s_dev')
self.verbosity = verbosity
self.delta0 = self.reg_coef
self.numpy_printoptions = np.set_printoptions(precision=4)
self.compiled = False
@staticmethod
def check_precompile(func):
"""Check if the Theano expression is actually a Theano function. If so,
return True, otherwise return False.
"""
return isinstance(func, FunctionType)
def validate(self):
"""Validate inputs
"""
assert self.x_dev is not None
assert self.f is not None
assert (self.ce is not None) or (
self.ce is None and self.dce is None and self.d2ce is None)
assert (self.ci is not None) or (
self.ci is None and self.dci is None and self.d2ci is None)
assert self.mu > 0.0
assert self.nu > 0.0
assert 0.0 < self.eta < 1.0
assert 0.0 < self.rho < 1.0
assert 0.0 < self.tau < 1.0
assert self.beta < 1.0
assert self.miter >= 0 and isinstance(self.miter, int)
assert self.niter >= 0 and isinstance(self.miter, int)
assert self.Xtol >= self.eps
assert self.Ktol >= self.eps
assert self.Ftol is None or self.Ftol >= 0.0
assert self.lbfgs >= 0 or self.lbfgs == False
if self.lbfgs:
assert isinstance(self.lbfgs, int)
assert self.lbfgs_zeta is None or self.lbfgs_zeta > 0.0
def compile(self, nvar=None, neq=None, nineq=None):
"""Validate some of the input variables and compile the objective function,
the gradient, and the Hessian with constraints.
"""
# get number of variables and constraints
if nvar is not None:
self.nvar = nvar
if neq is not None:
self.neq = neq
else:
self.neq = None
if nineq is not None:
self.nineq = nineq
else:
self.nineq = None
# check if any functions are precompiled
f_precompile = self.check_precompile(self.f)
df_precompile = self.check_precompile(self.df)
d2f_precompile = self.check_precompile(self.d2f)
ce_precompile = self.check_precompile(self.ce)
dce_precompile = self.check_precompile(self.dce)
d2ce_precompile = self.check_precompile(self.d2ce)
ci_precompile = self.check_precompile(self.ci)
dci_precompile = self.check_precompile(self.dci)
d2ci_precompile = self.check_precompile(self.d2ci)
if any([f_precompile, df_precompile, d2f_precompile, ce_precompile, dce_precompile, d2ce_precompile,
ci_precompile, dci_precompile, d2ci_precompile]):
precompile = True
else:
precompile = False
# get the number of equality and inequality constraints if not provided by the user
if self.ce is not None and self.neq is None:
if ce_precompile:
CE = self.ce
else:
CE = theano.function(inputs=[self.x_dev], outputs=self.ce)
c = CE(self.x0)
self.neq = c.size
elif neq is None:
self.neq = 0
else:
self.neq = neq
if self.ci is not None and self.nineq is None:
if ci_precompile:
CI = self.ci
else:
CI = theano.function(inputs=[self.x_dev], outputs=self.ci)
c = CI(self.x0)
self.nineq = c.size
elif nineq is None:
self.nineq = 0
else:
self.nineq = nineq
# declare device variables
if self.lambda_dev is None:
self.lambda_dev = T.vector('lamda_dev')
# use automatic differentiation if gradient and/or Hessian (if applicable) of f expressions are not provided
if self.df is None:
df = T.grad(self.f, self.x_dev)
else:
df = self.df
if not self.lbfgs and self.d2f is None:
d2f = theano.gradient.hessian(cost=self.f, wrt=self.x_dev)
else:
d2f = self.d2f
# construct expression for the constraint Jacobians and Hessians (if exact Hessian is used)
if self.neq:
if self.dce is None:
dce = theano.gradient.jacobian(
self.ce, wrt=self.x_dev).reshape((self.neq, self.nvar)).T
else:
dce = self.dce
if not self.lbfgs:
if self.d2ce is None:
d2ce = theano.gradient.hessian(cost=T.sum(
self.ce * self.lambda_dev[:self.neq]), wrt=self.x_dev)
else:
d2ce = self.d2ce
if self.nineq:
Sigma = diag(self.lambda_dev[self.neq:] / (self.s_dev + self.eps))
if self.dci is None:
dci = theano.gradient.jacobian(
self.ci, wrt=self.x_dev).reshape((self.nineq, self.nvar)).T
else:
dci = self.dci
if not self.lbfgs:
if self.d2ci is None:
d2ci = theano.gradient.hessian(cost=T.sum(
self.ci * self.lambda_dev[self.neq:]), wrt=self.x_dev)
else:
d2ci = self.d2ci
# if some expressions have been precompiled into functions, compile any remaining expressions
if precompile:
if not f_precompile:
f_func = theano.function(inputs=[self.x_dev], outputs=self.f)
else:
f_func = self.f
if not df_precompile:
df_func = theano.function(inputs=[self.x_dev], outputs=df)
else:
df_func = df
if not self.lbfgs:
if not d2f_precompile:
d2f_func = theano.function(
inputs=[self.x_dev], outputs=d2f)
else:
d2f_func = d2f
if self.neq:
if not ce_precompile:
ce_func = theano.function(
inputs=[self.x_dev], outputs=self.ce)
else:
ce_func = self.ce
if not dce_precompile:
dce_func = theano.function(
inputs=[self.x_dev], outputs=dce.reshape((self.nvar, self.neq)))
else:
def dce_func(x): return dce(
x).reshape((self.nvar, self.neq))
if not self.lbfgs:
if not d2ce_precompile:
d2ce_func = theano.function(
inputs=[self.x_dev, self.lambda_dev], outputs=d2ce)
else:
d2ce_func = d2ce
if self.nineq:
if not ci_precompile:
ci_func = theano.function(
inputs=[self.x_dev, self.s_dev], outputs=self.ci - self.s_dev)
else:
def ci_func(x, s): return self.ci(x) - s
if not dci_precompile:
dci_func = theano.function(
inputs=[self.x_dev], outputs=dci.reshape((self.nvar, self.nineq)))
else:
def dci_func(x): return dci(
x).reshape((self.nvar, self.nineq))
if not self.lbfgs:
if not d2ci_precompile:
d2ci_func = theano.function(
inputs=[self.x_dev, self.lambda_dev], outputs=d2ci)
else:
d2ci_func = d2ci
# construct composite expression for the constraints
if self.neq or self.nineq:
if precompile:
if self.neq and self.nineq:
def con(x, s): return np.concatenate([ce_func(x).reshape((self.neq,)),
ci_func(x, s).reshape((self.nineq,))], axis=0)
elif self.neq:
def con(x, s): return ce_func(x).reshape((self.neq,))
else:
def con(x, s): return ci_func(x, s).reshape((self.nineq,))
else:
con = T.zeros((self.neq + self.nineq,))
if self.neq:
con = T.set_subtensor(con[:self.neq], self.ce)
if self.nineq:
con = T.set_subtensor(con[self.neq:], self.ci - self.s_dev)
# construct composite expression for the constraints Jacobian
if self.neq or self.nineq:
if precompile:
if self.neq and self.nineq:
def jaco_top(x): return np.concatenate([dce_func(x).reshape((self.nvar, self.neq)),
dci_func(x).reshape((self.nvar, self.nineq))], axis=1)
jaco_bottom = np.concatenate(
[np.zeros((self.nineq, self.neq)), -np.eye(self.nineq)], axis=1)
def jaco(x): return np.concatenate(
[jaco_top(x), jaco_bottom], axis=0)
elif self.neq:
def jaco(x): return dce_func(
x).reshape((self.nvar, self.neq))
else:
def jaco(x): return np.concatenate([
dci_func(x).reshape((self.nvar, self.nineq)),
-np.eye(self.nineq)
], axis=0)
else:
jaco = T.zeros((self.nvar + self.nineq, self.neq + self.nineq))
if self.neq:
jaco = T.set_subtensor(jaco[:self.nvar, :self.neq], dce)
if self.nineq:
jaco = T.set_subtensor(jaco[:self.nvar, self.neq:], dci)
jaco = T.set_subtensor(
jaco[self.nvar:, self.neq:], -T.eye(self.nineq))
# construct expression for the gradient
if precompile:
if self.neq and self.nineq:
def grad_x(x, lda): return (df_func(x) - np.dot(dce_func(x), lda[:self.neq]) -
np.dot(dci_func(x), lda[self.neq:]))
elif self.neq:
def grad_x(x, lda): return df_func(
x) - np.dot(dce_func(x), lda)
elif self.nineq:
def grad_x(x, lda): return df_func(
x) - np.dot(dci_func(x), lda)
else:
def grad_x(x, lda): return df_func(x)
if self.nineq:
grad_s = self.lambda_dev[self.neq:] - \
self.mu_dev / (self.s_dev + self.eps)
grad_s = theano.function(inputs=[self.x_dev, self.s_dev, self.lambda_dev], outputs=grad_s,
on_unused_input='ignore')
if self.neq:
if ce_precompile:
def grad_lda_eq(x): return ce_func(x).ravel()
else:
grad_lda_eq = theano.function(inputs=[self.x_dev], outputs=self.ce.ravel(),
on_unused_input='ignore')
if self.nineq:
if ci_precompile:
def grad_lda_ineq(x, s): return ci_func(x, s).ravel()
else:
grad_lda_ineq = theano.function(inputs=[self.x_dev, self.s_dev],
outputs=(self.ci - self.s_dev).ravel(), on_unused_input='ignore')
if self.neq and self.nineq:
def grad(x, s, lda): return np.concatenate([grad_x(x, lda), grad_s(x, s, lda), grad_lda_eq(x),
grad_lda_ineq(x, s)], axis=0)
elif self.neq:
def grad(x, s, lda): return np.concatenate(
[grad_x(x, lda), grad_lda_eq(x)], axis=0)
elif self.nineq:
def grad(x, s, lda): return np.concatenate([grad_x(x, lda), grad_s(x, s, lda), grad_lda_ineq(x, s)],
axis=0)
else:
def grad(x, s, lda): return grad_x(x, lda)
else:
grad = T.zeros((self.nvar + 2 * self.nineq + self.neq,))
grad = T.set_subtensor(grad[:self.nvar], df)
if self.neq:
grad = T.inc_subtensor(
grad[:self.nvar], -T.dot(dce, self.lambda_dev[:self.neq]))
grad = T.set_subtensor(
grad[self.nvar + self.nineq:self.nvar + self.nineq + self.neq], self.ce)
if self.nineq:
grad = T.inc_subtensor(
grad[:self.nvar], -T.dot(dci, self.lambda_dev[self.neq:]))
grad = T.set_subtensor(grad[self.nvar:self.nvar + self.nineq], self.lambda_dev[self.neq:] -
self.mu_dev / (self.s_dev + self.eps))
grad = T.set_subtensor(
grad[self.nvar + self.nineq + self.neq:], self.ci - self.s_dev)
# construct expressions for the merit function
if precompile:
if self.nineq:
bar_func = self.mu_dev * T.sum(T.log(self.s_dev))
bar_func = theano.function(
inputs=[self.s_dev], outputs=bar_func, on_unused_input='ignore')
if self.neq and self.nineq:
def phi(x, s): return (f_func(x) + self.nu_dev.get_value() *
(np.sum(np.abs(ce_func(x))) + np.sum(np.abs(ci_func(x, s)))) - bar_func(s))
elif self.neq:
def phi(x, s): return f_func(x) + \
self.nu_dev.get_value() * np.sum(np.abs(ce_func(x)))
elif self.nineq:
def phi(x, s): return f_func(x) + self.nu_dev.get_value() * \
np.sum(np.abs(ci_func(x, s))) - bar_func(s)
else:
def phi(x, s): return f_func(x)
else:
phi = self.f
if self.neq:
phi += self.nu_dev * T.sum(T.abs_(self.ce))
if self.nineq:
phi += self.nu_dev * \
T.sum(T.abs_(self.ci - self.s_dev)) - \
self.mu_dev * T.sum(T.log(self.s_dev))
# construct expressions for the merit function gradient
if precompile:
if self.nineq:
dbar_func = T.dot(
self.mu_dev / (self.s_dev + self.eps), self.dz_dev[self.nvar:])
dbar_func = theano.function(inputs=[self.s_dev, self.dz_dev], outputs=dbar_func,
on_unused_input='ignore')
if self.neq and self.nineq:
def dphi(x, s, dz): return (np.dot(df_func(x), dz[:self.nvar]) - self.nu_dev.get_value() *
(np.sum(np.abs(ce_func(x))) + np.sum(np.abs(ci_func(x, s)))) -
dbar_func(s, dz))
elif self.neq:
def dphi(x, s, dz): return (np.dot(df_func(x), dz[:self.nvar]) -
self.nu_dev.get_value() * np.sum(np.abs(ce_func(x))))
elif self.nineq:
def dphi(x, s, dz): return (np.dot(df_func(x), dz[:self.nvar]) - self.nu_dev.get_value() *
np.sum(np.abs(ci_func(x, s))) - dbar_func(s, dz))
else:
def dphi(x, s, dz): return np.dot(df_func(x), dz[:self.nvar])
else:
dphi = T.dot(df, self.dz_dev[:self.nvar])
if self.neq:
dphi -= self.nu_dev * T.sum(T.abs_(self.ce))
if self.nineq:
dphi -= (self.nu_dev * T.sum(T.abs_(self.ci - self.s_dev)) +
T.dot(self.mu_dev / (self.s_dev + self.eps), self.dz_dev[self.nvar:]))
# construct expression for initializing the Lagrange multipliers
if self.neq or self.nineq:
if precompile:
def init_lambda(x): return np.dot(np.linalg.pinv(jaco(x)[:self.nvar, :]),
df_func(x).reshape((self.nvar, 1))).reshape((self.neq + self.nineq,))
else:
init_lambda = T.dot(pinv(jaco[:self.nvar, :]),
df.reshape((self.nvar, 1))).reshape((self.neq + self.nineq,))
# construct expression for initializing the slack variables
if self.nineq:
if precompile:
def init_slack(x): return np.max(np.concatenate([
ci_func(x, np.zeros((self.nineq,))
).reshape((self.nineq, 1)),
self.Ktol * np.ones((self.nineq, 1))
], axis=1), axis=1)
else:
init_slack = T.max(T.concatenate([
self.ci.reshape((self.nineq, 1)),
self.Ktol * T.ones((self.nineq, 1))
], axis=1), axis=1)
# construct expression for gradient of f( + the barrier function)
if precompile:
if self.nineq:
dbar_func2 = -self.mu_dev / (self.s_dev + self.eps)
dbar_func2 = theano.function(
inputs=[self.s_dev], outputs=dbar_func2, on_unused_input='ignore')
def barrier_cost_grad(x, s): return np.concatenate(
[df_func(x), dbar_func2(s)], axis=0)
else:
def barrier_cost_grad(x, s): return df_func(x)
else:
barrier_cost_grad = T.zeros((self.nvar + self.nineq,))
barrier_cost_grad = T.set_subtensor(
barrier_cost_grad[:self.nvar], df)
if self.nineq:
barrier_cost_grad = T.set_subtensor(barrier_cost_grad[self.nvar:],
-self.mu_dev / (self.s_dev + self.eps))
# construct expression for the Hessian of the Lagrangian (assumes Lagrange multipliers included in
# d2ce/d2ci expressions), if applicable
if not self.lbfgs:
if precompile:
# construct expression for the Hessian of the Lagrangian
if self.neq and self.nineq:
def d2L(x, lda): return d2f_func(x) - \
d2ce_func(x, lda) - d2ci_func(x, lda)
elif self.neq:
def d2L(x, lda): return d2f_func(x) - d2ce_func(x, lda)
elif self.nineq:
def d2L(x, lda): return d2f_func(x) - d2ci_func(x, lda)
else:
def d2L(x, lda): return d2f_func(x)
# construct expression for the symmetric Hessian matrix
if self.neq or self.nineq:
if self.nineq:
def hess_upper_left(x, s, lda): return np.concatenate([
np.concatenate([
np.triu(d2L(x, lda)),
np.zeros((self.nvar, self.nineq))
], axis=1),
np.concatenate([
np.zeros((self.nineq, self.nvar)),
np.diag(lda[self.neq:] / (s + self.eps))
], axis=1)
], axis=0)
else:
def hess_upper_left(
x, s, lda): return np.triu(d2L(x, lda))
def hess_upper_right(x): return jaco(x)
def hess_upper(x, s, lda): return np.concatenate([
hess_upper_left(x, s, lda),
hess_upper_right(x)
], axis=1)
def hess_triu(x, s, lda): return np.concatenate([
hess_upper(x, s, lda),
np.zeros((self.neq + self.nineq, self.nvar +
2 * self.nineq + self.neq))
], axis=0)
def hess(x, s, lda): return hess_triu(
x, s, lda) + np.triu(hess_triu(x, s, lda), k=1).T
else:
def hess_triu(x, s, lda): return np.triu(d2L(x, lda))
def hess(x, s, lda): return hess_triu(
x, s, lda) + np.triu(hess_triu(x, s, lda), k=1).T
else:
# construct expression for the Hessian of the Lagrangian
d2L = d2f
if self.neq:
d2L -= d2ce
if self.nineq:
d2L -= d2ci
# construct expression for the symmetric Hessian matrix
hess = T.zeros((self.nvar + 2 * self.nineq +
self.neq, self.nvar + 2 * self.nineq + self.neq))
hess = T.set_subtensor(
hess[:self.nvar, :self.nvar], T.triu(d2L))
if self.neq:
hess = T.set_subtensor(
hess[:self.nvar, (self.nvar + self.nineq):(self.nvar + self.nineq + self.neq)],
dce
)
if self.nineq:
hess = T.set_subtensor(
hess[:self.nvar, (self.nvar + self.nineq + self.neq):], dci)
hess = T.set_subtensor(hess[self.nvar:(self.nvar + self.nineq), self.nvar:(self.nvar + self.nineq)],
Sigma) # T.triu(Sigma))
hess = T.set_subtensor(
hess[self.nvar:(self.nvar + self.nineq),
(self.nvar + self.nineq + self.neq):],
-T.eye(self.nineq)
)
hess = T.triu(hess) + T.triu(hess).T
hess = hess - T.diag(T.diagonal(hess) / 2.0)
# construct expression for symmetric linear system solve
lin_soln = sym_solve(self.M_dev, self.b_dev)
# if using L-BFGS, get the expression for the descent direction
if self.lbfgs:
dz, dz_sqr = self.lbfgs_builder()
# compile expressions into device functions
if precompile:
self.cost = f_func
else:
self.cost = theano.function(inputs=[self.x_dev], outputs=self.f)
if precompile:
self.barrier_cost_grad = barrier_cost_grad
else:
self.barrier_cost_grad = theano.function(inputs=[self.x_dev, self.s_dev],
outputs=barrier_cost_grad, on_unused_input='ignore')
if precompile:
self.grad = grad
else:
self.grad = theano.function(inputs=[self.x_dev, self.s_dev, self.lambda_dev],
outputs=grad, on_unused_input='ignore')
if self.lbfgs:
self.lbfgs_dir_func = theano.function(inputs=[self.x_dev, self.s_dev, self.lambda_dev, self.g_dev,
self.zeta_dev, self.S_dev, self.Y_dev, self.SS_dev,
self.L_dev, self.D_dev, self.B_dev],
outputs=dz, on_unused_input='ignore')
if dz_sqr is not None:
self.lbfgs_dir_func_sqr = theano.function(inputs=[self.x_dev, self.s_dev, self.lambda_dev, self.g_dev,
self.zeta_dev, self.s_dev, self.Y_dev, self.SS_dev,
self.L_dev, self.D_dev, self.B_dev],
outputs=dz_sqr, on_unused_input='ignore')
else:
if precompile:
self.hess = hess
else:
self.hess = theano.function(
inputs=[self.x_dev, self.s_dev, self.lambda_dev],
outputs=hess, on_unused_input='ignore'
)
if precompile:
self.phi = phi
else:
self.phi = theano.function(
inputs=[self.x_dev, self.s_dev],
outputs=phi, on_unused_input='ignore'
)
if precompile:
self.dphi = dphi
else:
self.dphi = theano.function(
inputs=[self.x_dev, self.s_dev, self.dz_dev],
outputs=dphi, on_unused_input='ignore'
)
self.eigh = theano.function(
inputs=[self.M_dev],
outputs=eigvalsh(self.M_dev, T.eye(self.M_dev.shape[0])),
)
self.sym_solve_cmp = theano.function(
inputs=[self.M_dev, self.b_dev],
outputs=lin_soln,
)
# self.gen_solve = theano.function(
# inputs=[self.M_dev, self.b_dev],
# outputs=gen_solve,
# )
if self.neq or self.nineq:
if precompile:
self.con = con
else:
self.con = theano.function(
inputs=[self.x_dev, self.s_dev],
outputs=con, on_unused_input='ignore'
)
if precompile:
self.jaco = jaco
else:
self.jaco = theano.function(
inputs=[self.x_dev],
outputs=jaco,
on_unused_input='ignore',
)
if precompile:
self.init_lambda = init_lambda
else:
self.init_lambda = theano.function(
inputs=[self.x_dev],
outputs=init_lambda,
)
if self.nineq:
if precompile:
self.init_slack = init_slack
else:
self.init_slack = theano.function(
inputs=[self.x_dev],
outputs=init_slack,
)
self.compiled = True
def KKT(self, x, s, lda):
"""Calculate the first-order Karush-Kuhn-Tucker conditions. Irrelevant
conditions are set to zero.
"""
# kkt1 is the gradient of the Lagrangian with respect to x (weights)
# kkt2 is the gradient of the Lagrangian with respect to s (slack variables)
# kkt3 is the gradient of the Lagrangian with respect to lda[:self.neq] (equality constraints Lagrange
# multipliers)
# kkt4 is the gradient of the Lagrangian with respect to lda[self.neq:] (inequality constraints Lagrange
# multipliers)
kkts = self.grad(x, s, lda)
if self.neq and self.nineq:
kkt1 = kkts[:self.nvar]
kkt2 = kkts[self.nvar:(self.nvar + self.nineq)] * s
kkt3 = kkts[(self.nvar + self.nineq):(self.nvar + self.nineq + self.neq)]
kkt4 = kkts[(self.nvar + self.nineq + self.neq):]
elif self.neq:
kkt1 = kkts[:self.nvar]
kkt2 = self.float_dtype(0.0)
kkt3 = kkts[(self.nvar + self.nineq):(self.nvar + self.nineq + self.neq)]
kkt4 = self.float_dtype(0.0)
elif self.nineq:
kkt1 = kkts[:self.nvar]
kkt2 = kkts[self.nvar:(self.nvar + self.nineq)] * s
kkt3 = self.float_dtype(0.0)
kkt4 = kkts[(self.nvar + self.nineq + self.neq):]
else:
kkt1 = kkts[:self.nvar]
kkt2 = self.float_dtype(0.0)
kkt3 = self.float_dtype(0.0)
kkt4 = self.float_dtype(0.0)
return kkt1, kkt2, kkt3, kkt4
def lbfgs_init(self):
"""Initialize storage arrays for L-BFGS algorithm.
"""
# initialize diagonal constant and storage arrays
zeta = self.float_dtype(self.lbfgs_zeta)
S = np.array([], dtype=self.float_dtype).reshape((self.nvar, 0))
Y = np.array([], dtype=self.float_dtype).reshape((self.nvar, 0))
SS = np.array([], dtype=self.float_dtype).reshape((0, 0))