forked from OpenMathLib/OpenBLAS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzrotg.c
182 lines (169 loc) · 4.51 KB
/
zrotg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#include <math.h>
#include <float.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#ifndef CBLAS
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
#else
void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT *DA = (FLOAT*) VDA;
FLOAT *DB = (FLOAT*) VDB;
FLOAT *S = (FLOAT*) VS;
#endif /* CBLAS */
#ifdef DOUBLE
long double safmin = DBL_MIN;
long double rtmin = sqrt(DBL_MIN/DBL_EPSILON);
#else
long double safmin = FLT_MIN;
long double rtmin = sqrt(FLT_MIN/FLT_EPSILON);
#endif
FLOAT da_r = *(DA+0);
FLOAT da_i = *(DA+1);
FLOAT db_r = *(DB+0);
FLOAT db_i = *(DB+1);
//long double r;
FLOAT S1[2];
FLOAT R[2];
long double d;
FLOAT ada = da_r * da_r + da_i * da_i;
FLOAT adb = db_r * db_r + db_i * db_i;
PRINT_DEBUG_NAME;
IDEBUG_START;
FUNCTION_PROFILE_START();
if (db_r == ZERO && db_i == ZERO) {
*C = ONE;
*(S + 0) = ZERO;
*(S + 1) = ZERO;
return;
}
long double safmax = 1./safmin;
#if defined DOUBLE
long double rtmax = safmax /DBL_EPSILON;
#else
long double rtmax = safmax /FLT_EPSILON;
#endif
*(S1 + 0) = *(DB + 0);
*(S1 + 1) = *(DB + 1) *-1;
if (da_r == ZERO && da_i == ZERO) {
*C = ZERO;
if (db_r == ZERO) {
(*DA) = fabsl(db_i);
*S = *S1 /(*DA);
*(S+1) = *(S1+1) /(*DA);
return;
} else if ( db_i == ZERO) {
*DA = fabsl(db_r);
*S = *S1 /(*DA);
*(S+1) = *(S1+1) /(*DA);
return;
} else {
long double g1 = MAX( fabsl(db_r), fabsl(db_i));
rtmax =sqrt(safmax/2.);
if (g1 > rtmin && g1 < rtmax) { // unscaled
d = sqrt(adb);
*S = *S1 /d;
*(S+1) = *(S1+1) /d;
*DA = d ;
*(DA+1) = ZERO;
return;
} else { // scaled algorithm
long double u = MIN ( safmax, MAX ( safmin, g1));
FLOAT gs_r = db_r/u;
FLOAT gs_i = db_i/u;
d = sqrt ( gs_r*gs_r + gs_i*gs_i);
*S = gs_r / d;
*(S + 1) = (gs_i * -1) / d;
*DA = d * u;
*(DA+1) = ZERO;
return;
}
}
} else {
FLOAT f1 = MAX ( fabsl(da_r), fabsl(da_i));
FLOAT g1 = MAX ( fabsl(db_r), fabsl(db_i));
rtmax = sqrt(safmax / 4.);
if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax) { //unscaled
long double h = ada + adb;
double adahsq = sqrt(ada * h);
if (ada >= h *safmin) {
*C = sqrt(ada/h);
*R = *DA / *C;
*(R+1) = *(DA+1) / *(C+1);
rtmax *= 2.;
if ( ada > rtmin && h < rtmax) { // no risk of intermediate overflow
*S = *S1 * (*DA / adahsq) - *(S1+1)* (*(DA+1)/adahsq);
*(S+1) = *S1 * (*(DA+1) / adahsq) + *(S1+1) * (*DA/adahsq);
} else {
*S = *S1 * (*R/h) - *(S1+1) * (*(R+1)/h);
*(S+1) = *S1 * (*(R+1)/h) + *(S1+1) * (*(R)/h);
}
} else {
*C = ada / adahsq;
if (*C >= safmin) {
*R = *DA / *C;
*(R+1) = *(DA+1) / *(C+1);
} else {
*R = *DA * (h / adahsq);
*(R+1) = *(DA+1) * (h / adahsq);
}
*S = *S1 * ada / adahsq;
*(S+1) = *(S1+1) * ada / adahsq;
}
*DA=*R;
*(DA+1)=*(R+1);
return;
} else { // scaled
FLOAT fs_r, fs_i, gs_r, gs_i;
long double v,w,f2,g2,h;
long double u = MIN ( safmax, MAX ( safmin, MAX(f1,g1)));
gs_r = db_r/u;
gs_i = db_i/u;
g2 = sqrt ( gs_r*gs_r + gs_i*gs_i);
if (f1 /u < rtmin) {
v = MIN (safmax, MAX (safmin, f1));
w = v / u;
fs_r = *DA/ v;
fs_i = *(DA+1) / v;
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i);
h = f2 * w * w + g2;
} else { // use same scaling for both
w = 1.;
fs_r = *DA/ u;
fs_i = *(DA+1) / u;
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i);
h = f2 + g2;
}
if ( f2 >= h * safmin) {
*C = sqrt ( f2 / h );
*DA = fs_r / *C;
*(DA+1) = fs_i / *C;
rtmax *= 2;
if ( f2 > rtmin && h < rtmax) {
*S = gs_r * (fs_r /sqrt(f2*h)) - gs_i * (fs_i / sqrt(f2*h));
*(S+1) = gs_r * (fs_i /sqrt(f2*h)) + gs_i * -1. * (fs_r / sqrt(f2*h));
} else {
*S = gs_r * (*DA/h) - gs_i * (*(DA+1) / h);
*(S+1) = gs_r * (*(DA+1) /h) + gs_i * -1. * (*DA / h);
}
} else { // intermediates might overflow
d = sqrt ( f2 * h);
*C = f2 /d;
if (*C >= safmin) {
*DA = fs_r / *C;
*(DA+1) = fs_i / *C;
} else {
*DA = fs_r * (h / d);
*(DA+1) = fs_i / (h / d);
}
*S = gs_r * (fs_r /d) - gs_i * (fs_i / d);
*(S+1) = gs_r * (fs_i /d) + gs_i * -1. * (fs_r / d);
}
*C *= w;
*DA *= u;
*(DA+1) *= u;
return;
}
}
}