-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathfmadd.c
64 lines (45 loc) · 2.48 KB
/
fmadd.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/**
* Author: TripleZ<[email protected]>
* Date: 2018-08-20
*/
#include <immintrin.h>
#include <stdio.h>
int main(int argc, char const *argv[]) {
// Single-precision multiply and add with 128-bit vectors (FMA)
__m128 float_128_vec_0 = _mm_set1_ps(8.0);
__m128 float_128_vec_1 = _mm_set1_ps(20.0);
__m128 float_128_vec_2 = _mm_set1_ps(2.0);
__m128 float_128_result = _mm_fmadd_ps(float_128_vec_0, float_128_vec_1, float_128_vec_2);
float* flo = (float*) &float_128_result;
printf("float:\t\t%f, %f, %f, %f\n", flo[0], flo[1], flo[2], flo[3]);
// Double-precision multiply and add with 128-bit vectors (FMA)
__m128d double_128_vec_0 = _mm_set1_pd(8.0);
__m128d double_128_vec_1 = _mm_set1_pd(20.0);
__m128d double_128_vec_2 = _mm_set1_pd(2.0);
__m128d double_128_result = _mm_fmadd_pd(double_128_vec_0, double_128_vec_1, double_128_vec_2);
double* dou = (double*) &double_128_result;
printf("double:\t\t%lf, %lf\n", dou[0], dou[1]);
// Single-precision multiply and add with 256-bit vectors (FMA)
__m256 float_256_vec_0 = _mm256_set1_ps(8.0);
__m256 float_256_vec_1 = _mm256_set1_ps(20.0);
__m256 float_256_vec_2 = _mm256_set1_ps(2.0);
__m256 float_256_result = _mm256_fmadd_ps(float_256_vec_0, float_256_vec_1, float_256_vec_2);
flo = (float*) &float_256_result;
printf("float:\t\t%f, %f, %f, %f, %f, %f, %f, %f\n", flo[0], flo[1], flo[2], flo[3], flo[4], flo[5], flo[6], flo[7]);
// Double-precision multiply and add with 256-bit vectors (FMA)
__m256d double_256_vec_0 = _mm256_set1_pd(8.0);
__m256d double_256_vec_1 = _mm256_set1_pd(20.0);
__m256d double_256_vec_2 = _mm256_set1_pd(2.0);
__m256d double_256_result = _mm256_fmadd_pd(double_256_vec_0, double_256_vec_1, double_256_vec_2);
dou = (double*) &double_256_result;
printf("double:\t\t%lf, %lf, %lf, %lf\n", dou[0], dou[1], dou[2], dou[3]);
// Single-precision multiply and add the lowest element with 128-bit vectors (FMA)
__m128 float_128_low_result = _mm_fmadd_ss(float_128_vec_0, float_128_vec_1, float_128_vec_2);
flo = (float*) &float_128_low_result;
printf("float:\t\t%f, %f, %f, %f\n", flo[0], flo[1], flo[2], flo[3]);
// Double-precision multiply and add the lowest element with 128-bit vectors (FMA)
__m128d double_128_low_result = _mm_fmadd_sd(double_128_vec_0, double_128_vec_1, double_128_vec_2);
dou = (double*) &double_128_low_result;
printf("double:\t\t%lf, %lf\n", dou[0], dou[1]);
return 0;
}