forked from skeeto/mandel-simd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmandel_altivec.c
76 lines (64 loc) · 2.67 KB
/
mandel_altivec.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <altivec.h>
#include "mandel.h"
/*
* There's no one-instruction splat for float, although
* you can splat into a float from an element in another
* vector. Given that these are done outside of the innermost
* loop it might not be worth the effort.
*/
#define VF_ALL(x) ((vector float) { x, x, x, x })
void
mandel_altivec(unsigned char *image, const struct spec *s)
{
vector float xmin, ymin, xscale, yscale, iter_scale, depth_scale;
vector float threshold = VF_ALL(4.0);
vector float one = VF_ALL(1.0);
vector float zero = VF_ALL(0.0);
xmin = VF_ALL(s->xlim[0]);
ymin = VF_ALL(s->ylim[0]);
xscale = VF_ALL((s->xlim[1] - s->xlim[0]) / s->width);
yscale = VF_ALL((s->ylim[1] - s->ylim[0]) / s->height);
iter_scale = VF_ALL(1.0f / s->iterations);
depth_scale = VF_ALL(s->depth - 1);
#pragma omp parallel for schedule(dynamic, 1)
for (int y = 0; y < s->height; y++) {
for (int x = 0; x < s->width; x += 4) {
vector float mx = (vector float) { x, x + 1, x + 2, x + 3 };
vector float my = VF_ALL(y);
vector float cr = vec_madd(mx, xscale, xmin);
vector float ci = vec_madd(my, yscale, ymin);
vector float zr = cr;
vector float zi = ci;
int k = 1;
vector float mk = VF_ALL(1);
while (++k < s->iterations) {
/* Compute z1 from z0 */
vector float zr2cr = vec_madd(zr, zr, cr);
vector float zi2 = vec_madd(zi, zi, zero);
vector float zrzi = vec_madd(zr, zi, zero);
/* zr1 = zr0 * zr0 - zi0 * zi0 + cr */
/* zi1 = zr0 * zi0 + zr0 * zi0 + ci */
zr = vec_sub(zr2cr, zi2);
zi = vec_add(vec_add(zrzi, zrzi), ci);
/* Increment k */
vector float zr2 = vec_madd(zr, zr, zero);
vector float mag2 = vec_madd(zi, zi, zr2);
vector bool int mask = vec_cmplt(mag2, threshold);
mk = vec_add(mk, vec_and(one, mask));
if(vec_all_ge(mag2, threshold))
break;
}
mk = vec_madd(mk, iter_scale, zero);
mk = vec_madd(vec_rsqrte(mk), mk, zero);
mk = vec_madd(mk, depth_scale, zero);
vector int pixels = vec_cts(mk, 0);
unsigned char *dst = image + y * s->width * 3 + x * 3;
unsigned char *src = (unsigned char *)&pixels;
for (int i = 0; i < 4; i++) {
dst[i * 3 + 0] = src[(i * 4) + 3];
dst[i * 3 + 1] = src[(i * 4) + 3];
dst[i * 3 + 2] = src[(i * 4) + 3];
}
}
}
}