-
Notifications
You must be signed in to change notification settings - Fork 3
/
Ap.cpp
6993 lines (6571 loc) · 281 KB
/
Ap.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// ALGLIB++
// Based on ALGLIB: Copyright (c) Sergey Bochkanov (ALGLIB project).
// Revisions Copyright (c) Lydia Marie Williamson, Mark Hopkins Consulting
// Source License:
// This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation (www.fsf.org);
// either version 2 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// A copy of the GNU General Public License is available at http://www.fsf.org/licensing/licenses
#define InAlgLib
// if AE_OS == AE_LINUX (will be redefined to AE_POSIX in Ap.h),
// set _GNU_SOURCE flag BEFORE any #includes to get affinity management functions.
#if AE_OS == AE_LINUX && !defined _GNU_SOURCE
# define _GNU_SOURCE
#endif
// Must be defined before we include Ap.h.
#define _ALGLIB_IMPL_DEFINES
#define _ALGLIB_INTEGRITY_CHECKS_ONCE
#include "Ap.h"
#include <limits>
#include <locale.h>
#include <ctype.h>
#if defined AE_CPU && AE_CPU == AE_INTEL
# if AE_COMPILER == AE_GNUC && 0
# include <fpu_control.h> //(@) For _FPU_GETCW and _FPU_SETCW, for a fix that's apparently no longer needed.
# elif AE_COMPILER == AE_MSVC
# include <intrin.h>
# endif
#endif
// OS-specific includes.
#if AE_OS == AE_POSIX || defined AE_DEBUG4POSIX
# include <time.h>
# include <unistd.h>
# include <pthread.h>
# include <sched.h>
# include <sys/time.h> // For tickcount().
#elif AE_OS == AE_WINDOWS || defined AE_DEBUG4WINDOWS
# ifndef _WIN32_WINNT
# define _WIN32_WINNT 0x0501
# endif
# include <windows.h>
# include <process.h>
#endif
// Entropy source
#if ALGLIB_ENTROPY_SRC == ALGLIB_ENTROPY_SRC_OPENSSL
# include <openssl/rand.h>
# define ALGLIB_OPENSSL_RAND_MAX 0x7fffffff
#endif
// Debugging helpers for Windows.
#ifdef AE_DEBUG4WINDOWS
# include <windows.h>
#endif
namespace alglib_impl {
// Core Code (Vectors, Matrices, Memory Management, etc.)
// Local definitions.
#define AE_DATA_ALIGN 0x40
#define AE_PTR_ALIGN sizeof(void *)
#define AE_LITTLE_ENDIAN 1
#define AE_BIG_ENDIAN 2
#define AE_MIXED_ENDIAN 3
#define AE_SER_ENTRY_LENGTH 11
#define AE_SER_ENTRIES_PER_ROW 5
#if defined ALGLIB_REDZONE
# define _ALGLIB_REDZONE_VAL 0x3c
#endif
// These declarations are used to ensure, at compile-time, that
// sizeof(bool) == 1, sizeof(ae_int32_t) == 4, sizeof(ae_int64_t) == 8, sizeof(ae_uint64_t) == 8, sizeof(ae_int_t) == sizeof(void *).
// They are implemented by the following general method to verify the condition Cond:
// static char DummyArray[Cond ? +1 : -1];
// that would lead to a syntax error if the condition failed (by declaring a negative array size).
// You can remove them, if you want, since they are not used anywhere else.
#define ForceCond(Arr, Cond) static char Arr[(Cond) ? +1 : -1]
ForceCond(_ae_bool_must_be_8_bits_wide, (int)sizeof(bool) == 1);
ForceCond(_ae_int32_t_must_be_32_bits_wide, (int)sizeof(ae_int32_t) == 4);
ForceCond(_ae_int64_t_must_be_64_bits_wide, (int)sizeof(ae_int64_t) == 8);
ForceCond(_ae_uint64_t_must_be_64_bits_wide, (int)sizeof(ae_uint64_t) == 8);
ForceCond(_ae_int_t_must_be_pointer_sized, (int)sizeof(ae_int_t) == (int)sizeof(void *));
#if defined ALGLIB_REDZONE
ForceCond(_ae_redzone_must_be_multiple_of_64, (ALGLIB_REDZONE) >= (AE_DATA_ALIGN) && (ALGLIB_REDZONE) % (AE_DATA_ALIGN) == 0);
#endif
#undef ForceCond
// Allocation tracking, for debugging.
ae_int_t _alloc_counter = 0;
ae_int_t _alloc_counter_total = 0;
bool _use_alloc_counter = false;
// Allocation debugging.
bool _force_malloc_failure = false;
ae_int_t _malloc_failure_after = 0;
//(@) Originally a part of the global environment structure, these should all be made thread-local.
// A pointer to the jmp_buf for cases when C-style exception handling is used.
// It may be NULL.
AutoS jmp_buf *volatile CurBreakAt;
// Set the jump buffer for error handling.
void ae_state_set_break_jump(jmp_buf *buf) { CurBreakAt = buf; }
// The ae_error_type of the last error and a legible message for it; filled when an exception is thrown.
AutoS ae_error_type volatile CurStatus;
AutoS const char *volatile CurMsg;
// Flags: call-local settings for ALGLIB++.
AutoS ae_uint64_t CurFlags;
// Set CurFlags.
void ae_state_set_flags(ae_uint64_t flags) { CurFlags = flags; }
// A pointer to the top block in a stack of frames which hold dynamically allocated objects.
AutoS ae_frame *volatile TopFr;
#if 0 //(@) Not used, but retained for possible inclusion into the multi-threading core being created anew for ALGLIB++.
// Threading information.
// NOTES:
// * These are remnants from the Commercial Version, which worked with the (Commercial-only) file smp.h.
// * They were declared as generic pointers of type (void *) in order to avoid explicit dependency on smp.h.
// * The current thread pool.
// AutoS void *CurPool = NULL; //(@) Was never included in the Free Version.
// * The current worker thread.
AutoS void *CurThread = NULL;
// * The parent task: the one we are solving right now.
AutoS void *CurTask = NULL;
// * The thread exception handler: the function which must be called by ae_assert() before raising an exception.
AutoS void (*ErrorOp)(void *) = NULL;
#endif
// The stack and frame boundary special blocks.
static unsigned char DynBottom = 1, DynFrame = 2;
// Make a new stack frame for the environment.
// Fr points to the place in the dynamic block that marks where the frame begins.
// The dynamic block is assumed to be initialized by the caller and must be left alone
// (no changes, deallocations, or reuse) until ae_leave_frame() is called.
// It may be a global or (preferrably) a local variable.
void ae_frame_make(ae_frame *Fr) { Fr->p_next = TopFr, Fr->deallocator = NULL, Fr->ptr = &DynFrame, TopFr = Fr; }
// Leave the current stack frame and deallocate all automatic dynamic blocks which were attached to this frame.
void ae_frame_leave() {
for (; TopFr->ptr != &DynFrame && TopFr->ptr != &DynBottom; TopFr = TopFr->p_next)
if (TopFr->ptr != NULL && TopFr->deallocator != NULL) TopFr->deallocator(TopFr->ptr);
if (TopFr->ptr == &DynFrame) TopFr = TopFr->p_next;
}
// Initialize the ALGLIB++ frame stack environment.
// NOTE:
// * Stacks contain no frames, so ae_make_frame() must be called before attaching dynamic blocks.
// Without it ae_leave_frame() will cycle forever -- as intended.
void ae_state_init() {
// The base of the current stack of frames.
// p_next points to itself because a correct program should be able to detect the end of the list by looking at the ptr field.
// p_next == NULL may be used to distinguish automatic blocks (in the list) from non-automatic (not in the list).
static ae_frame BotFr = { &BotFr, NULL, &DynBottom };
// Set the status indicators and clear the frame.
CurBreakAt = NULL, CurMsg = "", CurFlags = NonTH, TopFr = &BotFr;
#if 0 //(@) Not used, but retained for possible inclusion into the multi-threading core being created anew for ALGLIB++.
// Set the threading information.
CurThread = NULL, CurTask = NULL, ErrorOp = NULL;
#endif
}
// Clear the ALGLIB++ frame stack environment, freeing all the dynamic data in it that it controls.
void ae_state_clear() {
if (TopFr == NULL) return;
for (; TopFr->ptr != &DynBottom; TopFr = TopFr->p_next)
if (TopFr->ptr != NULL && TopFr->deallocator != NULL) TopFr->deallocator(TopFr->ptr);
TopFr = NULL;
}
// Clean up automatically managed memory before the caller terminates ALGLIB++.
// For TopFr != NULL call ErrorOp(), if defined.
// For TopFr == NULL do nothing.
void ae_clean_up() {
#if 0 //(@) Not used, but retained for possible inclusion into the multi-threading core being created anew for ALGLIB++.
if (TopFr != NULL && ErrorOp != NULL) ErrorOp(TopFr);
#endif
}
// Abnormally abort the program, using one of several ways:
// * if TopFr != NULL and CurBreakAt points to a jmp_buf - longjmp() to the return site,
// * else abort().
// In all cases, for TopFr != NULL, set CurStatus and CurMsg.
// Clear the frame stack, if any, with ae_state_clear().
#if 0 //(@) Not used, but retained for possible inclusion into the multi-threading core being created anew for ALGLIB++.
// If TopFr != NULL and ErrorOp() != NULL, call ErrorOp() before handling errors and clearing TopFr.
#endif
static void ae_break(ae_error_type error_type, const char *msg) {
if (TopFr == NULL) abort();
#if 0 //(@) Not used, but retained for possible inclusion into the multi-threading core being created anew for ALGLIB++.
if (ErrorOp != NULL) ErrorOp(TopFr);
#endif
ae_state_clear();
CurStatus = error_type, CurMsg = msg;
if (CurBreakAt != NULL) longjmp(*CurBreakAt, 1); else abort();
}
// Assertion.
// Upon failure with TopFr != NULL, gracefully exit ALGLIB++, removing all frames and deallocating registered dynamic data structures.
// Otherwise, just abort().
// IMPORTANT:
// * This function ALWAYS evaluates cond, and cannot be replaced by macros which do nothing.
// In particular, when invoked, a function call may be used as the cond argument, and it will be carried out.
void ae_assert(bool cond, const char *msg) {
if (!cond) ae_break(ERR_ASSERTION_FAILED, msg);
}
#define AE_CRITICAL_ASSERT(x) if (!(x)) abort()
// Make flags variables into one or more char-sized variables in order to avoid problems with non-atomic reads/writes
// (single-byte ops are atomic on all contemporary CPUs).
#define _ALGLIB_FLG_THREADING_MASK 0x7
#define _ALGLIB_FLG_THREADING_SHIFT 0
static unsigned char _alglib_global_threading_flags = SerTH >> _ALGLIB_FLG_THREADING_SHIFT;
// Get/Set the default (global) threading model:
// * serial execution,
// * multithreading, if cores_to_use allows it.
ae_uint64_t ae_get_global_threading() {
return (ae_uint64_t)_alglib_global_threading_flags << _ALGLIB_FLG_THREADING_SHIFT;
}
void ae_set_global_threading(ae_uint64_t flg_value) {
flg_value &= _ALGLIB_FLG_THREADING_MASK;
AE_CRITICAL_ASSERT(flg_value == SerTH || flg_value == ParTH);
_alglib_global_threading_flags = (unsigned char)(flg_value >> _ALGLIB_FLG_THREADING_SHIFT);
}
// The recommended number of active workers:
// * The exact number of cores to use, if AE_NWORKERS > 0,
// * ALL available cores, if AE_NWORKERS == 0,
// * max(_alglib_cores_to_use + AE_NWORKERS, 1), if AE_NWORKERS < 0,
// * Default value == 0: either full parallelism if AE_NWORKERS is not defined,
// or a manually-set number of cores if AE_NWORKERS is defined.
// PROTECTION:
// * not needed; runtime modification is possible, but we do not need exact synchronization.
#if defined AE_NWORKERS && AE_NWORKERS <= 0
# error AE_NWORKERS must be positive number or not defined at all.
#endif
static ae_int_t _alglib_cores_to_use = 0;
// CPUID
// Information about the features the CPU and compiler support.
// You must tell ALGLIB++ what CPU family is used by defining AE_CPU (without this hint zero will be returned).
// NOTE:
// * The results of this function depend on both the CPU and compiler;
// if the compiler doesn't support SSE intrinsics, then the function won't set the corresponding flag.
static ae_cpuid_t ae_cpuid() {
// Determine the CPU characteristics and perform CPU-specific initializations.
// Previous calls are cached to speed things up.
// There is no synchronization, but this can be safely done on a per-thread basis,
// provided that simultaneous writes by different cores to the same location will be executed in serial manner,
// which is true of contemporary architectures.
static volatile bool _ae_cpuid_initialized = false, _ae_cpuid_has_sse2 = false, _ae_cpuid_has_avx2 = false, _ae_cpuid_has_fma = false;
// If not initialized, then determine the system properties.
if (!_ae_cpuid_initialized) {
#if defined AE_CPU && AE_CPU == AE_INTEL
{ // SSE2
# if AE_COMPILER == AE_GNUC || AE_COMPILER == AE_SUNC
ae_int_t a, b, c, d;
# elif AE_COMPILER == AE_MSVC
int CPUInfo[4];
# endif
// SSE2 support.
# if defined _ALGLIB_HAS_SSE2_INTRINSICS || AE_COMPILER == AE_SUNC
# if AE_COMPILER == AE_GNUC || AE_COMPILER == AE_SUNC
__asm__ __volatile__("cpuid":"=a"(a), "=b"(b), "=c"(c), "=d"(d):"a"(1));
if (d & 0x04000000) _ae_cpuid_has_sse2 = true;
# elif AE_COMPILER == AE_MSVC
__cpuid(CPUInfo, 1);
if (CPUInfo[3] & 0x04000000) _ae_cpuid_has_sse2 = true;
# endif
# endif
# if defined _ALGLIB_HAS_AVX2_INTRINSICS
// Check OS support for XSAVE XGETBV.
# if AE_COMPILER == AE_GNUC
__asm__ __volatile__("cpuid":"=a"(a), "=b"(b), "=c"(c), "=d"(d):"a"(1));
if (c & (0x1 << 27)) {
__asm__ volatile ("xgetbv":"=a" (a), "=d"(d):"c"(0));
if ((a & 0x6) == 0x6) {
if (_ae_cpuid_has_sse2) { // AVX2 support.
__asm__ __volatile__("cpuid":"=a"(a), "=b"(b), "=c"(c), "=d"(d):"a"(7), "c"(0));
if (b & (0x1 << 5)) _ae_cpuid_has_avx2 = true;
}
# if defined _ALGLIB_HAS_FMA_INTRINSICS
if (_ae_cpuid_has_avx2) { // FMA support.
__asm__ __volatile__("cpuid":"=a"(a), "=b"(b), "=c"(c), "=d"(d):"a"(1));
if (c & (0x1 << 12)) _ae_cpuid_has_fma = true;
}
# endif
}
}
# elif AE_COMPILER == AE_MSVC && _MSC_VER >= 1600
__cpuid(CPUInfo, 1);
if ((CPUInfo[2] & (0x1 << 27)) && (_xgetbv(0) & 0x6) == 0x6) {
if (_ae_cpuid_has_sse2) { // AVX2 support.
__cpuidex(CPUInfo, 7, 0);
if (CPUInfo[1] & (0x1 << 5)) _ae_cpuid_has_avx2 = true;
}
# if defined _ALGLIB_HAS_FMA_INTRINSICS
if (_ae_cpuid_has_avx2) { // FMA support.
__cpuid(CPUInfo, 1);
if (CPUInfo[2] & (0x1 << 12)) _ae_cpuid_has_fma = true;
}
# endif
}
# endif
# endif
} { // Perform one more CPUID call to generate memory fence
# if AE_COMPILER == AE_GNUC || AE_COMPILER == AE_SUNC
ae_int_t a, b, c, d;
__asm__ __volatile__("cpuid":"=a"(a), "=b"(b), "=c"(c), "=d"(d):"a"(1));
# elif AE_COMPILER == AE_MSVC
int CPUInfo[4];
__cpuid(CPUInfo, 1);
# endif
} { // Perform other CPU-related initializations.
# if AE_COMPILER == AE_GNUC && 0
// (@) Legacy code required for earlier versions of GCC, no longer needed here.
// (@) TODO: If it still needed to be used, then make ModeCPU externally accessible, so that it can be used to reset the CPU.
// Set rounding for floating-point math to double precision for x86/x64 processors under GCC.
fp_control_t ModeCPU; _FPU_GETCW(ModeCPU);
_FPU_SETCW(ModeCPU & ~(_FPU_EXTENDED | _FPU_SINGLE) | FPU_DOUBLE);
# endif
}
#endif
// Set the initialization flag.
_ae_cpuid_initialized = true;
}
return (ae_cpuid_t)(
(_ae_cpuid_has_sse2 ? (int)CPU_SSE2 : 0) |
(_ae_cpuid_has_avx2 ? (int)CPU_AVX2 : 0) |
(_ae_cpuid_has_fma ? (int)CPU_FMA : 0)
);
}
const/* AutoS */ae_cpuid_t CurCPU = ae_cpuid();
// The number of cores in the system: values < 1 may be returned on error.
ae_int_t ae_cores_count() {
#if AE_OS == AE_POSIX
return sysconf(_SC_NPROCESSORS_ONLN);
#elif AE_OS == AE_WINDOWS
SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo);
return (ae_int_t)sysInfo.dwNumberOfProcessors;
#else
return 1;
#endif
}
// The maximum concurrency on the given system, with the given compilation settings.
// Set to 1 on error or if fast kernels are disabled.
ae_int_t maxconcurrency() {
#if defined ALGLIB_NO_FAST_KERNELS
return 1;
#else
ae_int_t Cores = ae_cores_count(); return Cores < 0 ? 1 : Cores;
#endif
}
// Map the nworkers number (which can be positive, zero for "all cores" or negative, e.g. -1 meaning "all cores -1")
// to an "effective", strictly positive workers count.
//
// This is meant for use by debugging/testing code which tests different number of worker threads.
// It is NOT aligned in any way with the ALGLIB++ multithreading framework
// (i.e. it can return a non-zero worker count even for single-threaded GPLed ALGLIB++).
ae_int_t ae_get_effective_workers(ae_int_t nworkers) {
// Count the cores.
#if defined AE_NWORKERS
ae_int_t ncores = AE_NWORKERS;
#else
ae_int_t ncores = ae_cores_count();
#endif
AE_CRITICAL_ASSERT(ncores >= 1);
// Map nworkers to its effective value.
return nworkers >= 1 ? (nworkers > ncores ? ncores : nworkers) : (ncores + nworkers >= 1 ? ncores + nworkers : 1);
}
// Debug counters and flags.
static ae_int_t _dbg_alloc_total = 0;
static bool _use_dbg_counters = false;
static bool _use_vendor_kernels = true;
static bool debug_workstealing = false; // Debug work-stealing environment? false by default.
static ae_int_t dbgws_pushroot_ok = 0;
static ae_int_t dbgws_pushroot_failed = 0;
// Standard function wrappers for better GLIBC portability
#if defined X_FOR_LINUX
__asm__(".symver exp,exp@GLIBC_2.2.5");
__asm__(".symver log,log@GLIBC_2.2.5");
__asm__(".symver pow,pow@GLIBC_2.2.5");
double __wrap_exp(double x) { return exp(x); }
double __wrap_log(double x) { return log(x); }
double __wrap_pow(double x, double y) { return pow(x, y); }
#endif
ae_int64_t ae_get_dbg_value(debug_flag_t id) {
switch (id) {
case _ALGLIB_ALLOC_COUNTER: return _alloc_counter;
case _ALGLIB_TOTAL_ALLOC_SIZE: return _dbg_alloc_total;
case _ALGLIB_TOTAL_ALLOC_COUNT: return _alloc_counter_total;
#if defined AE_MKL
case _ALGLIB_VENDOR_MEMSTAT: return ae_mkl_memstat();
#endif
// Work-stealing counters.
#if defined AE_SMP
case _ALGLIB_WSDBG_NCORES: return ae_cores_count();
#endif
case _ALGLIB_WSDBG_PUSHROOT_OK: return dbgws_pushroot_ok;
case _ALGLIB_WSDBG_PUSHROOT_FAILED: return dbgws_pushroot_failed;
#if defined AE_SMP
case _ALGLIB_CORES_COUNT: return ae_cores_count();
#endif
case _ALGLIB_GLOBAL_THREADING: return ae_get_global_threading();
case _ALGLIB_NWORKERS: return _alglib_cores_to_use;
// Unknown value.
default: return 0;
}
}
void ae_set_dbg_value(debug_flag_t flag_id, ae_int64_t flag_val) {
switch (flag_id) {
case _ALGLIB_ALLOC_COUNTER: _use_alloc_counter = flag_val != 0; break;
case _ALGLIB_TOTAL_ALLOC_SIZE: _use_dbg_counters = flag_val != 0; break;
case _ALGLIB_USE_VENDOR_KERNELS: _use_vendor_kernels = flag_val != 0; break;
case _ALGLIB_DEBUG_WORKSTEALING: debug_workstealing = flag_val != 0; break;
case _ALGLIB_GLOBAL_THREADING: ae_set_global_threading((ae_uint64_t)flag_val); break;
case _ALGLIB_NWORKERS: _alglib_cores_to_use = (ae_int_t)flag_val; break;
}
}
// A wrapper around OS-dependent clock routines.
#if AE_OS == AE_POSIX || defined AE_DEBUG4POSIX
ae_int_t tickcount() {
struct timeval now;
ae_int64_t r, v;
gettimeofday(&now, NULL);
v = now.tv_sec;
r = v * 1000;
v = now.tv_usec / (suseconds_t)1000;
r += v;
return (ae_int_t)r;
# if 0
struct timespec now;
return clock_gettime(CLOCK_MONOTONIC, &now) ? 0 : now.tv_sec * 1000.0 + now.tv_nsec / 1000000.0;
# endif
}
#elif AE_OS == AE_WINDOWS || defined AE_DEBUG4WINDOWS
ae_int_t tickcount() {
return (ae_int_t)GetTickCount();
}
#else
ae_int_t tickcount() {
return 0;
}
#endif
ae_int_t ae_misalignment(const void *ptr, size_t alignment) {
union {
const void *ptr;
ae_int_t iptr;
} u;
u.ptr = ptr;
return u.iptr % (ae_int_t)alignment;
}
void *ae_align(void *ptr, size_t alignment) {
char *result = (char *)ptr;
if ((result - (char *)NULL) % alignment != 0)
result += alignment - (result - (char *)NULL) % alignment;
return result;
}
// The "optional atomics" functions:
// functions which either perform atomic changes - or fall back to an ordinary operation,
// if the current compiler settings cannot generate atomic code.
// They are all synchronized, i.e. either all of them work - or none of them do.
// Perform atomic addition on a pointer-sized and pointer-size aligned value.
// NOTE:
// * This function is not keyed in for high performance, so use it only when necessary.
static void ae_optional_atomic_add_i(ae_int_t *p, ae_int_t v) {
AE_CRITICAL_ASSERT(ae_misalignment(p, sizeof(void *)) == 0);
#if AE_COMPILER == AE_GNUC && AE_CPU == AE_INTEL && 100 * __GNUC__ + __GNUC__ >= 470
__atomic_add_fetch(p, v, __ATOMIC_RELAXED);
#elif defined __clang__ && AE_CPU == AE_INTEL
__atomic_fetch_add(p, v, __ATOMIC_RELAXED);
#elif AE_OS == AE_WINDOWS
while (true) {
// Convert between ae_int_t * and void ** without compiler warnings about indirection levels.
union {
PVOID volatile *volatile ptr;
volatile ae_int_t *volatile iptr;
} u;
u.iptr = p;
// Atomic read the initial value, convert it to a 1-byte pointer, then increment and store it.
PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
if (InterlockedCompareExchangePointer(u.ptr, (PVOID)((char *)v0 + v), v0) == v0) break;
}
#else
*p += v; // At least do something for older compilers!
#endif
}
// Perform atomic subtraction on a pointer-sized and pointer-size aligned value.
// NOTE:
// * This function is not keyed in for high performance, so use it only when necessary.
static void ae_optional_atomic_sub_i(ae_int_t *p, ae_int_t v) {
AE_CRITICAL_ASSERT(ae_misalignment(p, sizeof(void *)) == 0);
#if AE_COMPILER == AE_GNUC && AE_CPU == AE_INTEL && 100 * __GNUC__ + __GNUC__ >= 470
__atomic_sub_fetch(p, v, __ATOMIC_RELAXED);
#elif defined __clang__ && AE_CPU == AE_INTEL
__atomic_fetch_sub(p, v, __ATOMIC_RELAXED);
#elif AE_OS == AE_WINDOWS
while (true) {
// Convert between ae_int_t * and void ** without compiler warnings about indirection levels.
union {
PVOID volatile *volatile ptr;
volatile ae_int_t *volatile iptr;
} u;
u.iptr = p;
// Atomic read the initial value, convert it to a 1-byte pointer, then decrement and store it.
PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
if (InterlockedCompareExchangePointer(u.ptr, (PVOID)((char *)v0 - v), v0) == v0) break;
}
#else
*p -= v; // At least do something for older compilers!
#endif
}
#if AE_MALLOC == AE_BASIC_STATIC_MALLOC
// Fields for memory allocation over static array.
# if AE_THREADING != NonTH
# error Basic static malloc is thread-unsafe; define AE_THREADING = NonTH to prove that you know it.
# endif
static ae_int_t sm_page_size = 0;
static ae_int_t sm_page_cnt = 0;
static ae_int_t *sm_page_tbl = NULL;
static unsigned char *sm_mem = NULL;
void set_memory_pool(void *ptr, size_t size) {
// Integrity checks.
AE_CRITICAL_ASSERT(sm_page_size == 0);
AE_CRITICAL_ASSERT(sm_page_cnt == 0);
AE_CRITICAL_ASSERT(sm_page_tbl == NULL);
AE_CRITICAL_ASSERT(sm_mem == NULL);
AE_CRITICAL_ASSERT(size > 0);
// Align the pointer.
size -= ae_misalignment(ptr, sizeof *sm_page_tbl);
ptr = ae_align(ptr, sizeof *sm_page_tbl);
// Calculate the page size and page count, prepare pointers to the page table and memory.
sm_page_size = 0x100;
// We expect to have memory for at least one page + table entry + alignment.
AE_CRITICAL_ASSERT(size >= (sm_page_size + sizeof *sm_page_tbl) + sm_page_size);
sm_page_cnt = (size - sm_page_size) / (sm_page_size + sizeof *sm_page_tbl);
AE_CRITICAL_ASSERT(sm_page_cnt > 0);
sm_page_tbl = (ae_int_t *)ptr;
sm_mem = (unsigned char *)ae_align(sm_page_tbl + sm_page_cnt, sm_page_size);
// Mark all pages as free.
memset(sm_page_tbl, 0, sm_page_cnt * sizeof *sm_page_tbl);
}
static void *ae_static_malloc(size_t size, size_t alignment) {
AE_CRITICAL_ASSERT(size >= 0);
AE_CRITICAL_ASSERT(sm_page_size > 0);
AE_CRITICAL_ASSERT(sm_page_cnt > 0);
AE_CRITICAL_ASSERT(sm_page_tbl != NULL);
AE_CRITICAL_ASSERT(sm_mem != NULL);
if (size == 0 || _force_malloc_failure) return NULL;
// Check that the page alignment and requested alignment match each other.
AE_CRITICAL_ASSERT(alignment <= sm_page_size);
AE_CRITICAL_ASSERT((sm_page_size % alignment) == 0);
// Search a long enough sequence of pages.
int rq_pages = size / sm_page_size;
if (size % sm_page_size) rq_pages++;
int cur_len = 0;
for (int i = 0; i < sm_page_cnt; ) {
// Determine the length of the free page sequence.
if (sm_page_tbl[i] == 0) cur_len++;
else {
AE_CRITICAL_ASSERT(sm_page_tbl[i] > 0);
cur_len = 0;
i += sm_page_tbl[i];
continue;
}
// Found it?
if (cur_len >= rq_pages) {
// Update whichever counters the use-flags are set for.
if (_use_alloc_counter) {
ae_optional_atomic_add_i(&_alloc_counter, 1);
ae_optional_atomic_add_i(&_alloc_counter_total, 1);
}
if (_use_dbg_counters) ae_optional_atomic_add_i(&_dbg_alloc_total, size);
// Mark pages and return.
for (int j = 0; j < rq_pages; j++) sm_page_tbl[i - j] = -1;
sm_page_tbl[i - (rq_pages - 1)] = rq_pages;
return sm_mem + (i - (rq_pages - 1)) * sm_page_size;
}
// The next page.
i++;
}
return NULL;
}
static void ae_static_free(void *block) {
if (block == NULL) return;
ae_int_t page_idx = (unsigned char *)block - sm_mem;
AE_CRITICAL_ASSERT(page_idx >= 0);
AE_CRITICAL_ASSERT((page_idx % sm_page_size) == 0);
page_idx /= sm_page_size;
AE_CRITICAL_ASSERT(page_idx < sm_page_cnt);
ae_int_t page_cnt = sm_page_tbl[page_idx];
AE_CRITICAL_ASSERT(page_cnt >= 1);
for (ae_int_t i = 0; i < page_cnt; i++) sm_page_tbl[page_idx + i] = 0;
// Update the counters (if the use-flag is set).
if (_use_alloc_counter) ae_optional_atomic_sub_i(&_alloc_counter, 1);
}
void memory_pool_stats(ae_int_t *bytes_used, ae_int_t *bytes_free) {
AE_CRITICAL_ASSERT(sm_page_size > 0);
AE_CRITICAL_ASSERT(sm_page_cnt > 0);
AE_CRITICAL_ASSERT(sm_page_tbl != NULL);
AE_CRITICAL_ASSERT(sm_mem != NULL);
// Scan the page table.
*bytes_free = *bytes_used = 0;
for (int i = 0; i < sm_page_cnt; ) {
if (sm_page_tbl[i] == 0) {
++*bytes_free;
i++;
} else {
AE_CRITICAL_ASSERT(sm_page_tbl[i] > 0);
*bytes_used += sm_page_tbl[i];
i += sm_page_tbl[i];
}
}
*bytes_used *= sm_page_size;
*bytes_free *= sm_page_size;
}
#endif
void *aligned_malloc(size_t size, size_t alignment) {
#if AE_MALLOC == AE_BASIC_STATIC_MALLOC
return ae_static_malloc(size, alignment);
#else
if (size == 0 || _force_malloc_failure || _malloc_failure_after > 0 && _alloc_counter_total >= _malloc_failure_after) return NULL;
// Allocate, making the appropriate padding adjustments for any alignment > 1.
size_t alloc_size = 2 * sizeof(void *) + size;
if (alignment > 1) alloc_size += alignment - 1;
# if defined ALGLIB_REDZONE
alloc_size += 2 * (ALGLIB_REDZONE);
# endif
void *block = malloc(alloc_size); if (block == NULL) return NULL;
char *result = (char *)block + 2 * sizeof block;
result = (char *)ae_align(result, alignment);
*(void **)(result - sizeof block) = block;
# if defined ALGLIB_REDZONE
char *redzone0 = result;
result = redzone0 + (ALGLIB_REDZONE);
char *redzone1 = result + size;
ae_assert(ae_misalignment(result, alignment) == 0, "ALGLIB: improperly configured red zone size - is not multiple of the current alignment");
*(void **)(redzone0 - 2 * sizeof block) = redzone1;
memset(redzone0, _ALGLIB_REDZONE_VAL, ALGLIB_REDZONE);
memset(redzone1, _ALGLIB_REDZONE_VAL, ALGLIB_REDZONE);
# endif
// Update whichever counters the use-flags are set for.
if (_use_alloc_counter) {
ae_optional_atomic_add_i(&_alloc_counter, 1);
ae_optional_atomic_add_i(&_alloc_counter_total, 1);
}
if (_use_dbg_counters) ae_optional_atomic_add_i(&_dbg_alloc_total, (ae_int_t)size);
// Return the result as a generic pointer.
return (void *)result;
#endif
}
static void *aligned_extract_ptr(void *block) {
#if AE_MALLOC == AE_BASIC_STATIC_MALLOC
return NULL;
#elif defined ALGLIB_REDZONE
return block == NULL ? NULL : *(void **)((char *)block - (ALGLIB_REDZONE) - sizeof block);
#else
return block == NULL ? NULL : *(void **)((char *)block - sizeof block);
#endif
}
void aligned_free(void *block) {
#if AE_MALLOC == AE_BASIC_STATIC_MALLOC
ae_static_free(block);
#else
// Handle NULL input.
if (block == NULL) return;
// If red zone is activated, check it before deallocation.
# if defined ALGLIB_REDZONE
char *redzone0 = (char *)block - (ALGLIB_REDZONE);
char *redzone1 = (char *)*(void **)(redzone0 - 2 * sizeof block);
for (ae_int_t i = 0; i < (ALGLIB_REDZONE); i++) {
if (redzone0[i] != _ALGLIB_REDZONE_VAL) {
const char *msg = "ALGLIB: red zone corruption is detected (write prior to the block beginning?)";
fprintf(stderr, "%s\n", msg);
ae_assert(false, msg, NULL);
}
if (redzone1[i] != _ALGLIB_REDZONE_VAL) {
const char *msg = "ALGLIB: red zone corruption is detected (write past the end of the block?)";
fprintf(stderr, "%s\n", msg);
ae_assert(false, msg, NULL);
}
}
# endif
// Free the memory and optionally update allocation counters.
free(aligned_extract_ptr(block));
if (_use_alloc_counter) ae_optional_atomic_sub_i(&_alloc_counter, 1);
#endif
}
// Allocate memory with automatic alignment.
// Return NULL when size == 0 is specified.
// Upon failure with TopFr != NULL, call ae_break(), otherwise return NULL.
void *ae_malloc(size_t size) {
if (size == 0) return NULL;
void *result = aligned_malloc(size, AE_DATA_ALIGN);
if (result == NULL && TopFr != NULL) ae_break(ERR_OUT_OF_MEMORY, "ae_malloc: out of memory");
return result;
}
// Allocate memory with automatic alignment and zero-fill.
// Returns NULL when zero size is specified.
// Error handling:
// * if TopFr == NULL, return NULL on allocation error,
// * if TopFr != NULL, call ae_break() on allocation error.
static void *ae_malloc_zero(size_t size) {
void *result = ae_malloc(size);
if (result != NULL)
memset(result, 0, size);
return result;
}
void ae_free(void *p) {
if (p != NULL) aligned_free(p);
}
// Attach block to the dynamic block list for the ALGLIB++ environment.
// This function does NOT generate exceptions.
// NOTE:
// * Avoid calling it for the special blocks which mark frame boundaries!
static void ae_db_attach(ae_dyn_block *block) { block->p_next = TopFr, TopFr = block; }
// Allocate and initialize a dynamic block of size >= 0 bytes for the ALGLIB++ environment.
// It is assumed to be uninitialized, its fields are ignored.
// make_automatic indicates that the block is to be added to the dynamic block list.
// Upon allocation failure with TopFr != NULL, call ae_break(), leaving block in a valid (but empty) state.
// NOTES:
// * Avoid calling it for blocks which are already in the list.
// Use ae_db_realloc() for already-allocated blocks.
// * No memory allocation is performed for initialization with size == 0.
void ae_db_init(ae_dyn_block *block, size_t size, bool make_automatic) {
//(@) TopFr != NULL check and zero-check removed.
// NOTE:
// * These strange dances around block->ptr are necessary in order to correctly handle possible exceptions during memory allocation.
ae_assert(size >= 0, "ae_db_init: negative size");
block->ptr = NULL;
if (make_automatic) ae_db_attach(block); else block->p_next = NULL;
if (size != 0) block->ptr = ae_malloc(size);
block->deallocator = ae_free;
}
// Reallocate the dynamic block (assumed to be initialized) to size bytes for the ALGLIB++ environment.
// Delete the old contents but preserve the automatic state.
// Upon allocation failure with TopFr != NULL, call ae_break(), leaving block in a valid (but empty) state.
// NOTE:
// * Avoid calling it for the special blocks which mark frame boundaries!
void ae_db_realloc(ae_dyn_block *block, ae_int_t size) {
//(@) TopFr != NULL check removed.
// NOTE:
// * These strange dances around block->ptr are necessary in order to correctly handle possible exceptions during memory allocation.
ae_assert(size >= 0, "ae_db_realloc: negative size");
if (block->ptr != NULL) block->deallocator(block->ptr), block->ptr = NULL;
block->ptr = ae_malloc((size_t)size);
block->deallocator = ae_free;
}
// Clear the dynamic block (assumed to be initialized), releasing all dynamically allocated memory.
// The dynamic block may be in the automatic management list - in this case it will NOT be removed from the list.
// NOTE:
// * Avoid calling it for the special blocks which mark frame boundaries!
void ae_db_free(ae_dyn_block *block) {
if (block->ptr != NULL) block->deallocator(block->ptr), block->ptr = NULL;
block->deallocator = ae_free;
}
// Swap dynamic blocks block1 and block2 (pointers and deallocators)
// leaving other parameters (automatic management settings, etc.) unchanged.
// NOTE:
// * Avoid calling it for the special blocks which mark frame boundaries!
void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2) {
void *volatile ptr = block1->ptr;
ae_deallocator deallocator = block1->deallocator;
block1->ptr = block2->ptr;
block1->deallocator = block2->deallocator;
block2->ptr = ptr;
block2->deallocator = deallocator;
}
// The size of datatype or zero for dynamic types like strings or multiple precision types.
ae_int_t ae_sizeof(ae_datatype datatype) {
switch (datatype) {
// case DT_BYTE: // The same as DT_BOOL.
case DT_BOOL: return (ae_int_t)sizeof(bool);
case DT_INT: return (ae_int_t)sizeof(ae_int_t);
case DT_REAL: return (ae_int_t)sizeof(double);
case DT_COMPLEX: return 2 * (ae_int_t)sizeof(double);
default: return 0;
}
}
// Make dst into a new datatype ae_vector of size >= 0.
// Its contents are assumed to be uninitialized, and its fields are ignored.
// make_automatic indicates whether or not the vector is to be added to the dynamic block list.
// Upon allocation failure or size < 0, call ae_break().
// NOTE:
// * No memory allocation is performed for initialization with size == 0.
void ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, bool make_automatic) {
// Integrity checks.
//(@) TopFr != NULL check and zero-check removed.
ae_assert(size >= 0, "ae_vector_init: negative size");
// Prepare for possible errors during allocation.
dst->cnt = 0;
dst->xX = NULL;
// Initialize.
ae_db_init(&dst->data, (size_t)(size * ae_sizeof(datatype)), make_automatic);
dst->cnt = size;
dst->datatype = datatype;
dst->xX = dst->data.ptr;
dst->is_attached = false;
}
// Copy ae_vector src into ae_vector dst.
// dst is assumed to be uninitialized, its fields are ignored.
// The fields copied to dst are to be managed and owned by dst.
// make_automatic indicates whether or not the vector is to be added to the dynamic block list.
// Upon allocation failure, call ae_break().
void ae_vector_copy(ae_vector *dst, const ae_vector *src, bool make_automatic) {
//(@) TopFr != NULL check removed.
ae_vector_init(dst, src->cnt, src->datatype, make_automatic);
if (src->cnt != 0) memmove(dst->xX, src->xX, (size_t)(src->cnt * ae_sizeof(src->datatype)));
}
// Resize the ae_vector dst to size newsize >= 0.
// dst must be initialized.
// Its contents are freed by setlength().
// Upon allocation failure with TopFr != NULL, call ae_break(), otherwise return an indication of success or failure.
void ae_vector_set_length(ae_vector *dst, ae_int_t newsize) {
//(@) TopFr != NULL check removed.
ae_assert(newsize >= 0, "ae_vector_set_length: negative size");
if (dst->cnt == newsize) return;
// Reallocate, preparing first for possible errors.
dst->cnt = 0;
dst->xX = NULL;
ae_db_realloc(&dst->data, newsize * ae_sizeof(dst->datatype));
dst->cnt = newsize;
dst->xX = dst->data.ptr;
}
// Resize the ae_vector dst to size newsize >= 0, preserving previously existing elements.
// dst must be initialized.
// The values of elements added during vector growth are undefined.
// Upon allocation failure, call ae_break().
void ae_vector_resize(ae_vector *dst, ae_int_t newsize) {
ae_vector tmp; memset(&tmp, 0, sizeof tmp), ae_vector_init(&tmp, newsize, dst->datatype, false);
ae_int_t bytes_total = (dst->cnt < newsize ? dst->cnt : newsize) * ae_sizeof(dst->datatype);
if (bytes_total > 0) memmove(tmp.xX, dst->xX, bytes_total);
ae_swap_vectors(dst, &tmp);
ae_vector_free(&tmp, true);
}
// The "FREE" functionality for vector dst (cleared contents and freeing all internal structures).
// Clear vector dst (releasing all dynamically allocated memory).
// dst may be on the frame - in which case it will NOT be removed from the list.
// IMPORTANT:
// * This function does NOT invalidate dst; it just releases all dynamically allocated storage,
// but dst still may be used after calling ae_vector_set_length().
void ae_vector_free(ae_vector *dst, bool/* make_automatic*/) {
dst->cnt = 0;
ae_db_free(&dst->data);
dst->xX = 0;
dst->is_attached = false;
}
// Efficiently swap ae_vector vec1 with ae_vector vec2, leaving other pararemeters (automatic management, etc.) intact.
void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2) {
ae_assert(!vec1->is_attached, "ae_swap_vectors: internal error, attempt to swap vectors attached to X-object");
ae_assert(!vec2->is_attached, "ae_swap_vectors: internal error, attempt to swap vectors attached to X-object");
ae_db_swap(&vec1->data, &vec2->data);
ae_int_t cnt = vec1->cnt;
ae_datatype datatype = vec1->datatype;
void *p_ptr = vec1->xX;
vec1->cnt = vec2->cnt;
vec1->datatype = vec2->datatype;
vec1->xX = vec2->xX;
vec2->cnt = cnt;
vec2->datatype = datatype;
vec2->xX = p_ptr;
}
// Lay out the raster for matrix dst from storage.
// * dst must be a correctly initialized matrix.
// * dst->data.ptr points to the beginning of memory block allocated for row pointers.
// * dst->ptr - undefined (initialized during algorithm processing).
// * storage points to the beginning of actual storage.
static void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage) {
if (dst->cols > 0 && dst->rows > 0) {
char *p_base = (char *)storage;
void **pp_ptr = (void **)dst->data.ptr;
dst->xyX = pp_ptr;
for (ae_int_t i = 0; i < dst->rows; i++, p_base += dst->stride * ae_sizeof(dst->datatype))
pp_ptr[i] = p_base;
} else dst->xyX = NULL;
}
// Make dst into a new rows x cols datatype ae_matrix.
// The matrix size may be zero, in such cases both cols and rows will be zero.
// Its contents are assumed to be uninitialized, and its fields are ignored.
// make_automatic indicates whether or not the matrix is to be added to the dynamic block list,
// as opposed to being a global object or field of some other object.
// Upon allocation failure or cols < 0 or rows < 0, call ae_break().
// NOTE:
// * No memory allocation is performed for initialization with cols == 0 or rows == 0.
void ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, bool make_automatic) {
//(@) TopFr != NULL check and zero-check removed.
ae_assert(cols >= 0 && rows >= 0, "ae_matrix_init: negative length");
// If either cols or rows is 0, then they both must be made so.
if (cols == 0) rows = 0; else if (rows == 0) cols = 0;
// Initialize.
dst->datatype = datatype;
dst->stride = cols;
// Prepare for possible errors during allocation.
dst->rows = dst->cols = 0;
dst->xyX = NULL;
dst->is_attached = false;
// If cols and rows are 0; perform a quick exit.
if (cols == 0 || rows == 0) { ae_db_init(&dst->data, 0, make_automatic); return; }
// Initialize, preparing for possible errors during allocation.
for (; dst->stride * ae_sizeof(datatype) % AE_DATA_ALIGN != 0; dst->stride++);
ae_db_init(&dst->data, (size_t)(rows * (sizeof(void *) + dst->stride * ae_sizeof(datatype)) + AE_DATA_ALIGN - 1), make_automatic);
dst->cols = cols;
dst->rows = rows;
// Set the pointers to the matrix rows.
ae_matrix_update_row_pointers(dst, ae_align((char *)dst->data.ptr + rows * sizeof(void *), AE_DATA_ALIGN));
}
// Copy ae_matrix src to ae_matrix dst.
// dst is assumed to be uninitialized, its fields are ignored.
// make_automatic indicates whether or not dst is to be added to the dynamic block list,
// as opposed to being a global object or field of some other object.
// Upon allocation failure, call ae_break().
void ae_matrix_copy(ae_matrix *dst, const ae_matrix *src, bool make_automatic) {
ae_matrix_init(dst, src->rows, src->cols, src->datatype, make_automatic);
if (src->cols > 0 && src->rows > 0)
if (dst->stride == src->stride)
memmove(dst->xyX[0], src->xyX[0], (size_t)(src->rows * src->stride * ae_sizeof(src->datatype)));
else for (ae_int_t i = 0; i < dst->rows; i++)
memmove(dst->xyX[i], src->xyX[i], (size_t)(dst->cols * ae_sizeof(dst->datatype)));
}
// Resize ae_matrix dst to size rows x cols.
// Either cols, rows or both may be 0.
// The matrix dst must be initialized.
// Its contents are freed after setlength().
// Upon allocation failure with TopFr != NULL, call ae_break(), otherwise return an indication of success or failure.
void ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols) {
//(@) TopFr != NULL check removed.
ae_assert(cols >= 0 && rows >= 0, "ae_matrix_set_length: negative length");
if (dst->cols == cols && dst->rows == rows) return;
// Prepare the stride.
for (dst->stride = cols; dst->stride * ae_sizeof(dst->datatype) % AE_DATA_ALIGN != 0; dst->stride++);
// Prepare for possible errors during reallocation.
dst->rows = dst->cols = 0;
dst->xyX = NULL;
ae_db_realloc(&dst->data, rows * ((ae_int_t)sizeof(void *) + dst->stride * ae_sizeof(dst->datatype)) + AE_DATA_ALIGN - 1);
dst->cols = cols;
dst->rows = rows;
// Set the pointers to the matrix rows.
ae_matrix_update_row_pointers(dst, ae_align((char *)dst->data.ptr + dst->rows * sizeof(void *), AE_DATA_ALIGN));
}