Skip to content

Commit

Permalink
Distributed block-BMM
Browse files Browse the repository at this point in the history
  • Loading branch information
georgegito committed Sep 2, 2021
1 parent dd92047 commit f914704
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 114 deletions.
15 changes: 9 additions & 6 deletions include/block-bmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ void blockBmm(bcsr &A, bcsc &B, std::multimap<int, int> &C)
exit(1);
}

int blocksPerRow = A.m / A.b;
int numBlockRowsA = A.m / A.b;
int blocksPerRowA = A.n / A.b;
int numBlockRowsB = B.m / A.b;
int blocksPerRowB = B.n / A.b;

// high level matrix multiplication
for (int blockRowA = 0; blockRowA < blocksPerRow; blockRowA++) {
for (int blockColB = 0; blockColB < blocksPerRow; blockColB++) {
for (int blockRowA = 0; blockRowA < numBlockRowsA; blockRowA++) {
for (int blockColB = 0; blockColB < blocksPerRowB; blockColB++) {

std::multimap <int, int> _C; // block of matrix C

Expand All @@ -41,7 +44,7 @@ void blockRowColMult(int blockRowA, int blockColB, bcsr &A, bcsc &B, std::multim
int bIndA;
int bIndB;
int cN;
int blocksPerRow = A.m / A.b;
int blocksPerRowA = A.n / A.b;

int LL_rowPtrOffsetA, LL_colIndOffsetA;
int LL_colPtrOffsetB, LL_rowIndOffsetB;
Expand All @@ -56,8 +59,8 @@ void blockRowColMult(int blockRowA, int blockColB, bcsr &A, bcsc &B, std::multim
else {
cN = A.HL_bColInd[A.HL_bRowPtr[blockRowA] + ptr1]; // common neighbor index

bIndA = blockRowA * blocksPerRow + cN;
bIndB = blockColB * blocksPerRow + cN;
bIndA = blockRowA * blocksPerRowA + cN;
bIndB = blockColB * blocksPerRowA + cN;

util::blockOffsets(bIndA, A.nzBlockIndex, A.blockNnzCounter, A.b, LL_rowPtrOffsetA, LL_colIndOffsetA);
util::blockOffsets(bIndB, B.nzBlockIndex, B.blockNnzCounter, B.b, LL_colPtrOffsetB, LL_rowIndOffsetB);
Expand Down
118 changes: 61 additions & 57 deletions include/blocking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,34 @@

ret csr2bcsr(csr &M, bcsr &blM)
{
int numBlocks = (M.m / blM.b) * (M.n / blM.b), emptyBlocks = 0;

int numBlockRows = M.m / blM.b;
int blocksPerRow = M.n / blM.b;

int numBlocks = (M.m / blM.b) * (M.n / blM.b);
int emptyBlocks = 0;
int nnzb = 0;
int *blockNnzCounter = new int[numBlocks + 1]();
bool *isNotEmpty = new bool[numBlocks]();

for(int i = 0; i < M.m; i++)
for(int j = M.rowPtr[i]; j < M.rowPtr[i + 1]; j++)
blockNnzCounter[(i / blM.b) * (M.n / blM.b) + (M.colInd[j] / blM.b) + 1]++;
for (int i = 0; i < M.m; i++)
for (int j = M.rowPtr[i]; j < M.rowPtr[i + 1]; j++)
blockNnzCounter[(i / blM.b) * blocksPerRow + (M.colInd[j] / blM.b) + 1]++;

for(int i = 1; i < numBlocks + 1; i++) if (blockNnzCounter[i] == 0) emptyBlocks++;
for (int i = 1; i < numBlocks + 1; i++) {
if (blockNnzCounter[i] == 0) {
emptyBlocks++;
}
}

std::cout << "skata " << emptyBlocks << std::endl;
prt::arr(blockNnzCounter, numBlocks + 1);
for (int i = 0; i < M.m; i++)
for (int j = M.rowPtr[i]; j < M.rowPtr[i + 1]; j++)
isNotEmpty[(i / blM.b) * blocksPerRow + (M.colInd[j] / blM.b)] = true;

for (int i = 0; i < numBlocks; i++) {
if (isNotEmpty[i])
nnzb++;
}

int t = 0, t2 = 0, blkPtrSize = numBlocks - emptyBlocks + 1;
int *nzBlockIndex = new int[numBlocks];
Expand All @@ -41,7 +58,7 @@ ret csr2bcsr(csr &M, bcsr &blM)

for (int j = M.rowPtr[i]; j < M.rowPtr[i + 1]; j++) {

blockIdx = (i / blM.b) * (M.n / blM.b) + (M.colInd[j] / blM.b);
blockIdx = (i / blM.b) * blocksPerRow + (M.colInd[j] / blM.b);
colIndOffset = blockNnzCounter[blockIdx];
blM.LL_bColInd[colIndOffset + elementCounter[blockIdx]] = M.colInd[j] % blM.b;
elementCounter[blockIdx]++;
Expand All @@ -50,7 +67,7 @@ ret csr2bcsr(csr &M, bcsr &blM)
cnt++;
}

// delete[] elementCounter;
delete[] elementCounter;

int cumsum = 0;
for (int l = 0; l < blkPtrSize; l++) {
Expand All @@ -61,61 +78,34 @@ ret csr2bcsr(csr &M, bcsr &blM)
cumsum = 0;
}

// std::cout << "\nblockNnzCounter:";
// prt::arr(blockNnzCounter, numBlocks+1); //Non zeros of each block, thus externalBlockRowPtr
// std::cout << "nzBlockIndex:";
// prt::arr(nzBlockIndex, numBlocks); //Non zero block indices, can be transformed to BCSR with the offsets

/* -------------------------------------------------------------------------- */
/* TODO */
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
/* pop nz blocks of blockNnzCounter */
/* -------------------------------------------------------------------------- */

/* -------------------------------------------------------------------------- */
/* Low-Level CSR */
/* -------------------------------------------------------------------------- */

// std::cout << "\nLow-Level CSR\n";
// std::cout << "LL-bRowPtr:\t";
// prt::arr(blM.LL_bRowPtr, blkPtrSize * (blM.b + 1)); //Inside blkRowPtr
// std::cout << "LL-bColInd:\t";
// prt::arr(blM.LL_bColInd, M.nnz);

/* -------------------------------------------------------------------------- */
/* B-COO */
/* -------------------------------------------------------------------------- */

int num_of_block_rows = M.m / blM.b;
int blocks_per_row = M.n / blM.b;
int nnzb = blkPtrSize;

int *b_rows = new int[nnzb];
int *b_cols = new int[nnzb];

for (int i = 0; i < nnzb; i++) {
b_rows[i] = nzBlockIndex2[i] / blocks_per_row;
b_cols[i] = nzBlockIndex2[i] % blocks_per_row;
b_rows[i] = nzBlockIndex2[i] / blocksPerRow;
b_cols[i] = nzBlockIndex2[i] % blocksPerRow;
}

// delete[] nzBlockIndex2;
std::cout << emptyBlocks <<" brows-bcols\n";
prt::arr(b_rows, nnzb);
prt::arr(b_cols, nnzb);
// prt::arr(b_rows, nnzb);
// prt::arr(b_cols, nnzb);

/* -------------------------------------------------------------------------- */
/* B-CSR */
/* -------------------------------------------------------------------------- */

int *HL_bRowPtr = new int[num_of_block_rows + 1];
int *HL_bRowPtr = new int[numBlockRows + 1];
int *HL_bColInd = new int[nnzb];

coo2csr(HL_bRowPtr, HL_bColInd, b_rows, b_cols, nnzb, num_of_block_rows, 0);
coo2csr(HL_bRowPtr, HL_bColInd, b_rows, b_cols, nnzb, numBlockRows, 0);

// // std::cout << "\nHigh-Level B-CSR\n";
// // std::cout << "HL-b_rowPtr:\t";
// // prt::arr(HL_bRowPtr, num_of_block_rows + 1);
// // prt::arr(HL_bRowPtr, numBlockRows + 1);
// // std::cout << "HL-b_colInd:\t";
// // prt::arr(HL_bColInd, nnzb);

Expand All @@ -124,21 +114,39 @@ ret csr2bcsr(csr &M, bcsr &blM)
// delete[] b_rows;
// delete[] b_cols;

ret _ret = {HL_bRowPtr, HL_bColInd, nzBlockIndex, blockNnzCounter, num_of_block_rows+1,
ret _ret = {HL_bRowPtr, HL_bColInd, nzBlockIndex, blockNnzCounter, numBlockRows+1,
nnzb, numBlocks, numBlocks+1};
return _ret;
}

ret csc2bcsc(csc &M, bcsc &blM)
{
int numBlockRows = M.m / blM.b;
int blocksPerRow = M.n / blM.b;

int numBlocks = (M.m / blM.b) * (M.n / blM.b), emptyBlocks = 0;
int *blockNnzCounter = new int[numBlocks + 1]();

int nnzb = 0;
bool *isNotEmpty = new bool[numBlocks]();

for(int i = 0; i < M.n; i++)
for(int j = M.colPtr[i]; j < M.colPtr[i + 1]; j++)
blockNnzCounter[(i / blM.b) * (M.n / blM.b) + (M.rowInd[j] / blM.b) + 1]++;
blockNnzCounter[(i / blM.b) * numBlockRows + (M.rowInd[j] / blM.b) + 1]++;

for(int i = 0; i < numBlocks; i++) if (blockNnzCounter[i] == 0) emptyBlocks++;
for(int i = 0; i < numBlocks; i++) {
if (blockNnzCounter[i] == 0) {
emptyBlocks++;
}
}

for (int i = 0; i < M.n; i++)
for (int j = M.colPtr[i]; j < M.colPtr[i + 1]; j++)
isNotEmpty[(i / blM.b) * numBlockRows + (M.rowInd[j] / blM.b)] = true;

for (int i = 0; i < numBlocks; i++) {
if (isNotEmpty[i])
nnzb++;
}

int t = 0, t2 = 0, blkPtrSize = numBlocks - emptyBlocks + 1;
int *nzBlockIndex = new int[numBlocks];
Expand Down Expand Up @@ -208,16 +216,12 @@ ret csc2bcsc(csc &M, bcsc &blM)
/* B-COO */
/* -------------------------------------------------------------------------- */

int num_of_block_rows = M.n / blM.b;
int blocks_per_row = num_of_block_rows;
int nnzb = blkPtrSize;

int *b_rows = new int[nnzb];
int *b_cols = new int[nnzb];

for (int i = 0; i < nnzb; i++) {
b_cols[i] = nzBlockIndex2[i] / blocks_per_row; // TODO check
b_rows[i] = nzBlockIndex2[i] % blocks_per_row;
b_cols[i] = nzBlockIndex2[i] / blocksPerRow; // TODO check
b_rows[i] = nzBlockIndex2[i] % blocksPerRow;
}

delete[] nzBlockIndex2;
Expand All @@ -229,14 +233,14 @@ ret csc2bcsc(csc &M, bcsc &blM)
/* B-CSC */
/* -------------------------------------------------------------------------- */

int *HL_bColPtr = new int[num_of_block_rows + 1];
int *HL_bColPtr = new int[numBlockRows + 1];
int *HL_bRowInd = new int[nnzb];

coo2csr(HL_bColPtr, HL_bRowInd, b_cols, b_rows, nnzb, num_of_block_rows, 0);
coo2csr(HL_bColPtr, HL_bRowInd, b_cols, b_rows, nnzb, numBlockRows, 0);

// std::cout << "\nHigh-Level B-CSC\n";
// std::cout << "HL-bColPtr:\t";
// prt::arr(HL_bColPtr, num_of_block_rows + 1);
// prt::arr(HL_bColPtr, numBlockRows + 1);
// std::cout << "HL-bRowInd:\t";
// prt::arr(HL_bRowInd, nnzb);

Expand All @@ -245,7 +249,7 @@ ret csc2bcsc(csc &M, bcsc &blM)
delete[] b_rows;
delete[] b_cols;

ret _ret = {HL_bColPtr, HL_bRowInd, nzBlockIndex, blockNnzCounter, num_of_block_rows+1,
ret _ret = {HL_bColPtr, HL_bRowInd, nzBlockIndex, blockNnzCounter, numBlockRows+1,
nnzb, numBlocks, numBlocks + 1};

return _ret;
Expand Down
17 changes: 11 additions & 6 deletions include/masked-block-bmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,15 @@ void maskedBlockBmm(bcsr &F, bcsr &A, bcsc &B, std::multimap<int, int> &C)
exit(1);
}

int blocksPerRow = A.m / A.b;
int numBlockRowsF = F.m / F.b;
int blocksPerRowF = F.n / F.b;
int numBlockRowsA = A.m / A.b;
int blocksPerRowA = A.n / A.b;
int numBlockRowsB = B.m / A.b;
int blocksPerRowB = B.n / A.b;

// high level matrix multiplication
for (int blockRowF = 0; blockRowF < blocksPerRow; blockRowF++) {
for (int blockRowF = 0; blockRowF < numBlockRowsF; blockRowF++) {
for (int indF = F.HL_bRowPtr[blockRowF]; indF < F.HL_bRowPtr[blockRowF + 1]; indF++) {

int blockColF = F.HL_bColInd[indF];
Expand All @@ -42,8 +47,8 @@ void maskedBlockRowColMult(int blockRowF, int blockColF, bcsr &F, bcsr &A, bcsc
int bIndA;
int bIndB;
int cN;
int blocksPerRow = A.m / A.b;
int bIndF = blockRowF * blocksPerRow + blockColF;
int blocksPerRowA = A.n / A.b;
int bIndF = blockRowF * blocksPerRowA + blockColF;

int LL_rowPtrOffsetF, LL_colIndOffsetF;
int LL_rowPtrOffsetA, LL_colIndOffsetA;
Expand All @@ -62,8 +67,8 @@ void maskedBlockRowColMult(int blockRowF, int blockColF, bcsr &F, bcsr &A, bcsc
else {
cN = A.HL_bColInd[A.HL_bRowPtr[blockRowA] + ptr1]; // common neighbor index

bIndA = blockRowA * blocksPerRow + cN;
bIndB = blockColB * blocksPerRow + cN;
bIndA = blockRowA * blocksPerRowA + cN;
bIndB = blockColB * blocksPerRowA + cN;

util::blockOffsets(bIndF, F.nzBlockIndex, F.blockNnzCounter, F.b, LL_rowPtrOffsetF, LL_colIndOffsetF);
util::blockOffsets(bIndA, A.nzBlockIndex, A.blockNnzCounter, A.b, LL_rowPtrOffsetA, LL_colIndOffsetA);
Expand Down
Loading

0 comments on commit f914704

Please sign in to comment.