Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NTT Evaulation #962

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,12 @@ let
pytorch = casesSelf.callPackage ./pytorch { };
disp = casesSelf.callPackage ./disp { };
emurt-test = casesSelf.callPackage ./emurt/tests { };
eval = casesSelf.callPackage ./eval { };
}));

# remove non-case attributes in scope
scopeStripped = {
inherit (scope) mlir intrinsic asm perf codegen rvv_bench pytorch disp emurt-test;
inherit (scope) mlir intrinsic asm perf codegen rvv_bench pytorch disp emurt-test eval;
};

# This derivation is for internal CI use only.
Expand Down
44 changes: 44 additions & 0 deletions tests/eval/_ntt/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{ linkerScript
, makeBuilder
, t1main
}:

let
builder = makeBuilder { casePrefix = "eval"; };
build_ntt = caseName /* must be consistent with attr name */ : main_src: kernel_src:
builder {
caseName = caseName;

src = ./.;

passthru.featuresRequired = { };

buildPhase = ''
runHook preBuild

$CC -T${linkerScript} \
${main_src} ${kernel_src} \
${t1main} \
-o $pname.elf

runHook postBuild
'';

meta.description = "test case 'ntt'";
};

in {
ntt_64 = build_ntt "ntt_64" ./ntt.c ./ntt_64_main.c;
ntt_128 = build_ntt "ntt_128" ./ntt.c ./ntt_128_main.c;
ntt_256 = build_ntt "ntt_256" ./ntt.c ./ntt_256_main.c;
ntt_512 = build_ntt "ntt_512" ./ntt.c ./ntt_512_main.c;
ntt_1024 = build_ntt "ntt_1024" ./ntt.c ./ntt_1024_main.c;
ntt_4096 = build_ntt "ntt_4096" ./ntt.c ./ntt_4096_main.c;

ntt_mem_64 = build_ntt "ntt_mem_64" ./ntt_mem.c ./ntt_64_main.c;
ntt_mem_128 = build_ntt "ntt_mem_128" ./ntt_mem.c ./ntt_128_main.c;
ntt_mem_256 = build_ntt "ntt_mem_256" ./ntt_mem.c ./ntt_256_main.c;
ntt_mem_512 = build_ntt "ntt_mem_512" ./ntt_mem.c ./ntt_512_main.c;
ntt_mem_1024 = build_ntt "ntt_mem_1024" ./ntt_mem.c ./ntt_1024_main.c;
ntt_mem_4096 = build_ntt "ntt_mem_4096" ./ntt_mem.c ./ntt_4096_main.c;
}
132 changes: 132 additions & 0 deletions tests/eval/_ntt/ntt.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include <assert.h>
#include <stdio.h>

// array is of length n=2^l, p is a prime number
// roots is of length l, where g = roots[0] satisfies that
// g^(2^l) == 1 mod p and g^(2^(l-1)) == -1 mod p
// roots[i] = g^(2^i) (hence roots[l - 1] = -1)
//
// 32bit * n <= VLEN * 8 => n <= VLEN / 4
void ntt(const int *array, int l, const int *twiddle, int p, int *dst) {
// prepare an array of permutation indices
assert(l <= 16);

int n = 1 << l;

// registers:
// v8-15: array
// v16-24: loaded elements (until vrgather)
// v4-7: permutation index (until vrgather)
// v16-24: coefficients
int vlenb;
asm("csrr %0, vlenb" : "=r"(vlenb));
int elements_in_vreg = vlenb * 2;
assert(elements_in_vreg >= n);

asm("vsetvli zero, %0, e16, m4, tu, mu\n"
"vid.v v4\n"
:
: "r"(n));

// prepare the bit-reversal permutation list
for (int k = 0; 2 * k < l; k++) {
asm("vand.vx v8, v4, %0\n"
"vsub.vv v4, v4, v8\n"
"vsll.vx v8, v8, %1\n" // get the k-th digit and shift left

"vand.vx v12, v4, %2\n"
"vsub.vv v4, v4, v12\n"
"vsrl.vx v12, v12, %1\n" // get the (l-k-1)-th digit and shift right

"vor.vv v4, v4, v8\n"
"vor.vv v4, v4, v12\n"

:
: "r"(1 << k), "r"(l - 1 - 2 * k), "r"(1 << (l - k - 1)));
}

// perform bit-reversal for input coefficients
asm("vsetvli zero, %0, e32, m8, tu, mu\n"
"vle32.v v16, 0(%1)\n"
"vrgatherei16.vv v8, v16, v4\n"
"vse32.v v8, 0(%2)\n"

:
: "r"(n), "r"(array), "r"(dst));

// generate permutation list (0, 2, 4, ..., 1, 3, 5, ...)
asm("vsetvli zero, %0, e16, m4, tu, mu\n"
"vid.v v4\n"
"vsrl.vx v0, v4, %1\n" // (0, 0, 0, 0, ..., 1, 1, 1, 1, ...)
"vand.vx v4, v4, %2\n" // (0, 1, 2, 3, ..., 0, 1, 2, 3, ...)
"vsll.vi v4, v4, 1\n"
"vadd.vv v4, v4, v0\n"

:
: "r"(n), "r"(l-1), "r"((n / 2 - 1)), "r"(n / 2));

#ifdef DEBUG
int tmp1[USERN];// c
int tmp2[USERN];// c
int tmp3[USERN];// c
#endif

for (int k = 0; k < l; k++) {
asm(
// "n" mode
"vsetvli zero, %0, e32, m8, tu, mu\n"
// load coefficients
"vle32.v v16, 0(%4)\n"
// perform permutation for coefficient
"vrgatherei16.vv v8, v16, v4\n"
// save coefficients
"vse32.v v8, 0(%4)\n"

// "n/2" mode
"vsetvli zero, %1, e32, m4, tu, mu\n"
// load twiddle factors
"vle32.v v16, 0(%2)\n"
// load half coefficients
"vle32.v v8, 0(%4)\n"
"vle32.v v12, 0(%5)\n"

#ifdef DEBUG
"vse32.v v8, 0(%6)\n"
"vse32.v v12, 0(%7)\n"
"vse32.v v16, 0(%8)\n"
#endif

// butterfly operation
"vmul.vv v12, v12, v16\n"
"vrem.vx v12, v12, %3\n"
"vadd.vv v16, v8, v12\n" // NOTE: use lazy reduction here
"vsub.vv v20, v8, v12\n"
// save half coefficients
"vse32.v v16, 0(%4)\n"
"vse32.v v20, 0(%5)\n"
:
: /* %0 */ "r"(n),
/* %1 */ "r"(n / 2),
/* %2 */ "r"(twiddle + k * (n / 2)),
/* %3 */ "r"(p),
"r"(dst),
"r"(dst + (n / 2))
#ifdef DEBUG
, "r"(tmp1), "r"(tmp2), "r"(tmp3)
#endif
);
#ifdef DEBUG
for(int k = 0; k < USERN; k++) {
printf("(%x, %x, %x)\n", tmp1[k], tmp2[k], tmp3[k]);
}
#endif
}
// deal with modular
asm("vsetvli zero, %0, e32, m8, tu, mu\n"
"vle32.v v16, 0(%1)\n"
"vrem.vx v8, v16, %2\n"
"vse32.v v8, 0(%1)\n"

:
: "r"(n), "r"(dst), "r"(p));
}
161 changes: 161 additions & 0 deletions tests/eval/_ntt/ntt_1024_main.c

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions tests/eval/_ntt/ntt_128_main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// requires VLEN >= 512

#include <stdio.h>

void ntt(const int *array, int l, const int *twiddle, int p, int *dst);

void test() {
const int l = 7;
const int n = 128;
static const int arr[128] = {
9997, 6362, 7134, 11711, 5849, 9491, 5972, 4164, 5894, 11069,
7697, 8319, 2077, 12086, 10239, 5394, 4898, 1370, 1205, 2997,
5274, 4625, 11983, 1789, 3645, 7666, 12128, 10883, 7376, 8883,
2321, 1889, 2026, 8059, 2741, 865, 1785, 9955, 2395, 9330,
11465, 7383, 9649, 11285, 3647, 578, 1158, 9936, 12019, 11114,
7894, 4832, 10148, 10363, 11388, 9122, 10758, 2642, 4171, 10586,
1194, 5280, 3055, 9220, 10577, 9046, 1284, 7915, 10213, 6902,
3777, 9896, 429, 7730, 7429, 8666, 10887, 11255, 2437, 7782,
1327, 7010, 4009, 1038, 9466, 5352, 1473, 10067, 11753, 2019,
8472, 7665, 2679, 5070, 2248, 3044, 10301, 10671, 2092, 1069,
9032, 9131, 11715, 6662, 3423, 10027, 5436, 4259, 999, 3316,
11164, 5597, 6578, 800, 8242, 6952, 2288, 1481, 6770, 11948,
8938, 10813, 11107, 1362, 4510, 9388, 8840, 10557};
// const int twiddle[7] = {12149, 7311, 5860, 4134, 8246, 1479, 12288};
static const int twiddle[] = {
// layer #0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

// layer #1
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479,

// layer #2
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146,

// layer #3
1, 1, 1, 1, 1, 1, 1, 1, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305,

// layer #4
1, 1, 1, 1, 5860, 5860, 5860, 5860, 4134, 4134, 4134, 4134, 3621, 3621, 3621, 3621, 8246, 8246, 8246, 8246, 1212, 1212, 1212, 1212, 11567, 11567, 11567, 11567, 8785, 8785, 8785, 8785, 1479, 1479, 1479, 1479, 3195, 3195, 3195, 3195, 6553, 6553, 6553, 6553, 9744, 9744, 9744, 9744, 5146, 5146, 5146, 5146, 10643, 10643, 10643, 10643, 1305, 1305, 1305, 1305, 3542, 3542, 3542, 3542,

// layer #5
1, 1, 7311, 7311, 5860, 5860, 3006, 3006, 4134, 4134, 5023, 5023, 3621, 3621, 2625, 2625, 8246, 8246, 8961, 8961, 1212, 1212, 563, 563, 11567, 11567, 5728, 5728, 8785, 8785, 4821, 4821, 1479, 1479, 10938, 10938, 3195, 3195, 9545, 9545, 6553, 6553, 6461, 6461, 9744, 9744, 11340, 11340, 5146, 5146, 5777, 5777, 10643, 10643, 9314, 9314, 1305, 1305, 4591, 4591, 3542, 3542, 2639, 2639,

// layer #6
1, 12149, 7311, 8736, 5860, 2963, 3006, 9275, 4134, 11112, 5023, 9542, 3621, 9198, 2625, 1170, 8246, 726, 8961, 11227, 1212, 2366, 563, 7203, 11567, 2768, 5728, 9154, 8785, 11289, 4821, 955, 1479, 1853, 10938, 4805, 3195, 7393, 9545, 3201, 6553, 4255, 6461, 4846, 9744, 12208, 11340, 9970, 5146, 4611, 5777, 2294, 10643, 9238, 9314, 10963, 1305, 1635, 4591, 8577, 3542, 7969, 2639, 11499,
};
const int p = 12289;
int dst[128];
ntt(arr, l, twiddle, p, dst);

#ifdef DEBUG
for (int i = 0; i < n; i++) {
printf("%d", dst[i]);
if ((i + 1) % 8 == 0) {
printf("\n");
} else {
printf(" ");
}
}
#endif
}
77 changes: 77 additions & 0 deletions tests/eval/_ntt/ntt_256_main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// requires VLEN >= 1024

#include <stdio.h>

void ntt(const int *array, int l, const int *twiddle, int p, int *dst);

void test() {
const int l = 8;
const int n = 256;
static const int arr[256] = {
9997, 6362, 7134, 11711, 5849, 9491, 5972, 4164, 5894, 11069,
7697, 8319, 2077, 12086, 10239, 5394, 4898, 1370, 1205, 2997,
5274, 4625, 11983, 1789, 3645, 7666, 12128, 10883, 7376, 8883,
2321, 1889, 2026, 8059, 2741, 865, 1785, 9955, 2395, 9330,
11465, 7383, 9649, 11285, 3647, 578, 1158, 9936, 12019, 11114,
7894, 4832, 10148, 10363, 11388, 9122, 10758, 2642, 4171, 10586,
1194, 5280, 3055, 9220, 10577, 9046, 1284, 7915, 10213, 6902,
3777, 9896, 429, 7730, 7429, 8666, 10887, 11255, 2437, 7782,
1327, 7010, 4009, 1038, 9466, 5352, 1473, 10067, 11753, 2019,
8472, 7665, 2679, 5070, 2248, 3044, 10301, 10671, 2092, 1069,
9032, 9131, 11715, 6662, 3423, 10027, 5436, 4259, 999, 3316,
11164, 5597, 6578, 800, 8242, 6952, 2288, 1481, 6770, 11948,
8938, 10813, 11107, 1362, 4510, 9388, 8840, 10557, 6206, 7808,
7131, 1394, 2604, 1509, 689, 5222, 8867, 9934, 7165, 6099,
3229, 1263, 4414, 12212, 4963, 9236, 9040, 6062, 11163, 8169,
4575, 6097, 3006, 1, 1384, 12039, 5445, 11355, 12197, 9182,
10085, 9295, 8890, 10651, 1540, 9061, 10222, 2524, 2213, 6974,
2066, 7348, 7444, 173, 7529, 3884, 3531, 4312, 640, 5352,
5880, 3985, 781, 10165, 1106, 8114, 6043, 8202, 10617, 3060,
11173, 11521, 6933, 9540, 11782, 2284, 6462, 3740, 2581, 126,
508, 12165, 4956, 8045, 9379, 5250, 8148, 6539, 4891, 11252,
5041, 9969, 8524, 9892, 4058, 10580, 10025, 9748, 8829, 4438,
468, 4773, 1657, 1348, 10055, 7192, 9556, 5919, 5690, 6153,
6270, 4938, 6206, 1003, 596, 11173, 9858, 4825, 7940, 794,
7477, 10146, 7203, 4729, 5741, 4603, 1806, 7034, 8772, 10435,
10777, 1359, 630, 11059, 8005, 225};
// const int twiddle[8] = {8340, 12149, 7311, 5860, 4134, 8246, 1479, 12288};
static const int twiddle[] = {
// layer #0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

// layer #1
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479,

// layer #2
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146,

// layer #3
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305,

// layer #4
1, 1, 1, 1, 1, 1, 1, 1, 5860, 5860, 5860, 5860, 5860, 5860, 5860, 5860, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 4134, 3621, 3621, 3621, 3621, 3621, 3621, 3621, 3621, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 8246, 1212, 1212, 1212, 1212, 1212, 1212, 1212, 1212, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 11567, 8785, 8785, 8785, 8785, 8785, 8785, 8785, 8785, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 1479, 3195, 3195, 3195, 3195, 3195, 3195, 3195, 3195, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 6553, 9744, 9744, 9744, 9744, 9744, 9744, 9744, 9744, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 5146, 10643, 10643, 10643, 10643, 10643, 10643, 10643, 10643, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 1305, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542,

// layer #5
1, 1, 1, 1, 7311, 7311, 7311, 7311, 5860, 5860, 5860, 5860, 3006, 3006, 3006, 3006, 4134, 4134, 4134, 4134, 5023, 5023, 5023, 5023, 3621, 3621, 3621, 3621, 2625, 2625, 2625, 2625, 8246, 8246, 8246, 8246, 8961, 8961, 8961, 8961, 1212, 1212, 1212, 1212, 563, 563, 563, 563, 11567, 11567, 11567, 11567, 5728, 5728, 5728, 5728, 8785, 8785, 8785, 8785, 4821, 4821, 4821, 4821, 1479, 1479, 1479, 1479, 10938, 10938, 10938, 10938, 3195, 3195, 3195, 3195, 9545, 9545, 9545, 9545, 6553, 6553, 6553, 6553, 6461, 6461, 6461, 6461, 9744, 9744, 9744, 9744, 11340, 11340, 11340, 11340, 5146, 5146, 5146, 5146, 5777, 5777, 5777, 5777, 10643, 10643, 10643, 10643, 9314, 9314, 9314, 9314, 1305, 1305, 1305, 1305, 4591, 4591, 4591, 4591, 3542, 3542, 3542, 3542, 2639, 2639, 2639, 2639,

// layer #6
1, 1, 12149, 12149, 7311, 7311, 8736, 8736, 5860, 5860, 2963, 2963, 3006, 3006, 9275, 9275, 4134, 4134, 11112, 11112, 5023, 5023, 9542, 9542, 3621, 3621, 9198, 9198, 2625, 2625, 1170, 1170, 8246, 8246, 726, 726, 8961, 8961, 11227, 11227, 1212, 1212, 2366, 2366, 563, 563, 7203, 7203, 11567, 11567, 2768, 2768, 5728, 5728, 9154, 9154, 8785, 8785, 11289, 11289, 4821, 4821, 955, 955, 1479, 1479, 1853, 1853, 10938, 10938, 4805, 4805, 3195, 3195, 7393, 7393, 9545, 9545, 3201, 3201, 6553, 6553, 4255, 4255, 6461, 6461, 4846, 4846, 9744, 9744, 12208, 12208, 11340, 11340, 9970, 9970, 5146, 5146, 4611, 4611, 5777, 5777, 2294, 2294, 10643, 10643, 9238, 9238, 9314, 9314, 10963, 10963, 1305, 1305, 1635, 1635, 4591, 4591, 8577, 8577, 3542, 3542, 7969, 7969, 2639, 2639, 11499, 11499,

// layer #7
1, 8340, 12149, 12144, 7311, 8011, 8736, 9048, 5860, 11336, 2963, 10530, 3006, 480, 9275, 6534, 4134, 6915, 11112, 2731, 5023, 10908, 9542, 9005, 3621, 5067, 9198, 3382, 2625, 5791, 1170, 334, 8246, 2396, 726, 8652, 8961, 5331, 11227, 3289, 1212, 6522, 2366, 8595, 563, 1022, 7203, 4388, 11567, 130, 2768, 6378, 5728, 4177, 9154, 5092, 8785, 12171, 11289, 4231, 4821, 9821, 955, 1428, 1479, 8993, 1853, 6747, 10938, 1673, 4805, 11560, 3195, 3748, 7393, 3707, 9545, 9447, 3201, 4632, 6553, 2837, 4255, 8357, 6461, 9764, 4846, 9408, 9744, 10092, 12208, 355, 11340, 11745, 9970, 2426, 5146, 4452, 4611, 3459, 5777, 7300, 2294, 10276, 10643, 11462, 9238, 5179, 9314, 12280, 10963, 1260, 1305, 7935, 1635, 7399, 4591, 8705, 8577, 10200, 3542, 9813, 7969, 2548, 2639, 11950, 11499, 10593,
};
const int p = 12289;
int dst[256];
ntt(arr, l, twiddle, p, dst);

#ifdef DEBUG
for (int i = 0; i < n; i++) {
printf("%d", dst[i]);
if ((i + 1) % 8 == 0) {
printf("\n");
} else {
printf(" ");
}
}
#endif
}
Loading
Loading