From 26b6f00960c2f3c5895a70151d67eff8628ff159 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 17 Oct 2024 13:08:11 -0600 Subject: [PATCH 1/5] ShyLU - Basker : replace View-of-Views with std::vector-of-Views Signed-off-by: iyamazaki --- .../shylu_node/basker/src/shylubasker_def.hpp | 45 +-- .../basker/src/shylubasker_error_manager.hpp | 162 +++++------ .../basker/src/shylubasker_matrix_decl.hpp | 3 + .../basker/src/shylubasker_matrix_def.hpp | 9 +- .../basker/src/shylubasker_nfactor_blk.hpp | 144 +++++----- .../src/shylubasker_nfactor_blk_inc.hpp | 272 +++++++++--------- .../basker/src/shylubasker_nfactor_col.hpp | 266 ++++++++--------- .../basker/src/shylubasker_nfactor_col2.hpp | 84 +++--- .../src/shylubasker_nfactor_col_inc.hpp | 266 ++++++++--------- .../basker/src/shylubasker_nfactor_diag.hpp | 66 ++--- .../basker/src/shylubasker_order.hpp | 16 +- .../basker/src/shylubasker_sfactor.hpp | 189 +++++++----- .../basker/src/shylubasker_sfactor_inc.hpp | 52 ++-- .../basker/src/shylubasker_solve_rhs.hpp | 16 +- .../basker/src/shylubasker_solve_rhs_tr.hpp | 16 +- .../basker/src/shylubasker_structs.hpp | 7 +- .../basker/src/shylubasker_tree.hpp | 33 ++- .../basker/src/shylubasker_types.hpp | 158 +++++----- .../basker/src/shylubasker_util.hpp | 255 ++++++++-------- 19 files changed, 1089 insertions(+), 970 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp index c1b92347a094..c7b9d66311ab 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp @@ -92,16 +92,8 @@ namespace BaskerNS BASKER_INLINE void Basker::Finalize() { - //finalize all matrices - A.Finalize(); - At.Finalize(); //??? is At even used - BTF_A.Finalize(); - BTF_C.Finalize(); - BTF_B.Finalize(); - BTF_D.Finalize(); - BTF_E.Finalize(); - //finalize array of 2d matrics + // Actuall Finalize is called by desctructor FREE_MATRIX_2DARRAY(AVM, tree.nblks); FREE_MATRIX_2DARRAY(ALM, tree.nblks); @@ -120,7 +112,6 @@ namespace BaskerNS //Thread Array FREE_THREAD_1DARRAY(thread_array); - basker_barrier.Finalize(); //S (Check on this) FREE_INT_2DARRAY(S, tree.nblks); @@ -187,12 +178,6 @@ namespace BaskerNS FREE_ENTRY_1DARRAY(x_view_ptr_scale); FREE_ENTRY_1DARRAY(y_view_ptr_scale); - - //Structures - part_tree.Finalize(); - tree.Finalize(); - stree.Finalize(); - stats.Finalize(); }//end Finalize() @@ -239,7 +224,7 @@ namespace BaskerNS //Option = 2, BTF BASKER if(option == 1) - { + { default_order(); } else if(option == 2) @@ -475,12 +460,16 @@ namespace BaskerNS //Find BTF ordering if(btf_order2() != BASKER_SUCCESS) { + if(Options.verbose == BASKER_TRUE) + { + printf("Basker Ordering Failed \n"); fflush(stdout); + } return BASKER_ERROR; } if(Options.verbose == BASKER_TRUE) { - printf("Basker Ordering Found \n"); + printf("Basker Ordering Found \n"); fflush(stdout); } /*if((Options.btf == BASKER_TRUE) && (btf_tabs_offset != 0)) @@ -512,7 +501,7 @@ namespace BaskerNS if(symb_flag == BASKER_TRUE) { if(Options.verbose == BASKER_TRUE) { - printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); + printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); fflush(stdout); } return BASKER_ERROR; } @@ -547,7 +536,7 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf(" == Basker Symbolic Done ==\n\n"); + printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout); } #ifdef BASKER_TIMER @@ -1573,7 +1562,7 @@ namespace BaskerNS #endif } - // ---------------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------------- // 'sort' rows of BTF_A into ND structure #if 0 for (Int i = 0; i < BTF_A.nnz; ++i) { @@ -1621,6 +1610,7 @@ namespace BaskerNS symmetric_sfactor(); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for symbolic after ND on a big block A: " << nd_symbolic_timer.seconds() << std::endl; + fflush(stdout); } Kokkos::Timer nd_last_dense_timer; @@ -1628,16 +1618,23 @@ namespace BaskerNS btf_last_dense(flag); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for last-dense after ND on a big block A: " << nd_last_dense_timer.seconds() << std::endl; + fflush(stdout); } #ifdef BASKER_KOKKOS // ---------------------------------------------------------------------------------------------- // Allocate & Initialize blocks + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif /*kokkos_sfactor_init_workspace iWS(flag, this); @@ -1950,10 +1947,16 @@ namespace BaskerNS }*/ Kokkos::Timer nd_setup2_timer; +#ifdef BASKER_PARALLEL_INIT_WORKSPACE kokkos_sfactor_init_workspace iWS(flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); +#else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(flag, p); + } +#endif if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index a6e1f5c41e91..84cbb8b801b7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -46,66 +46,66 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; - } else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + } else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK SINGULAR: blk=" << thread_array(ti).error_blk + << " DOMBLK SINGULAR: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK NOMALLOC : blk=" << thread_array(ti).error_blk + << " DOMBLK NOMALLOC : blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_dom_error error_blk"); + BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_dom_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREAD: " << ti - << " DOMBLK MALLOC : blk=" << thread_array(ti).error_blk - << " subblk=" << thread_array(ti).error_subblk - << " newsize=" << thread_array(ti).error_info + << " DOMBLK MALLOC : blk=" << thread_array[ti].error_blk + << " subblk=" << thread_array[ti].error_subblk + << " newsize=" << thread_array[ti].error_info << std::endl; } //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array(ti).error_subblk != BASKER_MAX_IDX) + if(thread_array[ti].error_subblk != BASKER_MAX_IDX) { - BASKER_ASSERT(thread_array(ti).error_info > 0, "L) newsize not big enough"); - resize_L = thread_array(ti).error_info; + BASKER_ASSERT(thread_array[ti].error_info > 0, "L) newsize not big enough"); + resize_L = thread_array[ti].error_info; //if L is already bigger and U, //We will want re size U as, well - if(thread_array(ti).error_subblk == 0) + if(thread_array[ti].error_subblk == 0) { - Int blkcol = thread_array(ti).error_blk; + Int blkcol = thread_array[ti].error_blk; Int blkUrow = LU_size(blkcol)-1; - if(LL(blkcol)(0).nnz >= - LU(blkcol)(blkUrow).nnz) + if(LL[blkcol][0].nnz >= + LU[blkcol][blkUrow].nnz) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; } }//if - a domain } //We don't care about the other way since, //L is already checked before U. - if(thread_array(ti).error_subblk == -1) + if(thread_array[ti].error_subblk == -1) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; } //Resize L, if resize_L != -1 (meaning realloc-L is requested) @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(thread_array(ti).error_subblk); + LL[thread_array[ti].error_blk][thread_array[ti].error_subblk]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU(thread_array(ti).error_blk)(0); + LU[thread_array[ti].error_blk][0]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(0); + LL[thread_array[ti].error_blk][0]; L.clear_pend(); } @@ -163,11 +163,11 @@ namespace BaskerNS { //Clear workspace, whole column for(Int sb = 0; - sb < LL_size(thread_array(ti).error_blk); + sb < LL_size(thread_array[ti].error_blk); sb++) { BASKER_MATRIX &SL = - LL(thread_array(ti).error_blk)(sb); + LL[thread_array[ti].error_blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -198,13 +198,13 @@ namespace BaskerNS }//for - sb (subblks) }//if ws is filled - threads_start(ti) = thread_array(ti).error_blk; + threads_start(ti) = thread_array[ti].error_blk; //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC @@ -231,26 +231,26 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { thread_start(ti) = BASKER_MAX_IDX; continue; } - else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " SEPBLK SINGULAR: blk=" << thread_array(ti).error_blk + << " SEPBLK SINGULAR: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREADS: " << ti - << " SEPBLK NOMALLOC: blk=" << thread_array(ti).error_blk + << " SEPBLK NOMALLOC: blk=" << thread_array[ti].error_blk << std::endl; } return BASKER_ERROR; @@ -260,22 +260,22 @@ namespace BaskerNS Int error_sep_lvl = BASKER_MAX_IDX; for(Int l = 1; l < tree.nlvls+1; l++) { - if(thread_array(ti).error_blk == S(l)(ti)) + if(thread_array[ti].error_blk == S[l][ti]) { error_sep_lvl = l; break; } } - if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_SEP_error error_blk"); + BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_SEP_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti - << " SEPBLK MALLOC: blk=" << thread_array(ti).error_blk - << " subblk=" << thread_array(ti).error_subblk - << " newsize=" << thread_array(ti).error_info + << " SEPBLK MALLOC: blk=" << thread_array[ti].error_blk + << " subblk=" << thread_array[ti].error_subblk + << " newsize=" << thread_array[ti].error_info << std::endl; std::cout << " > SEPLVL: " << error_sep_lvl << std::endl; } @@ -283,9 +283,9 @@ namespace BaskerNS //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array(ti).error_subblk <= -1) + if(thread_array[ti].error_subblk <= -1) { - resize_L = thread_array(ti).error_info; + resize_L = thread_array[ti].error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ L size: " << resize_L << std::endl; @@ -293,9 +293,9 @@ namespace BaskerNS } //We don't care about the other way since, //L is already checked before U. - if(thread_array(ti).error_subblk > -1) + if(thread_array[ti].error_subblk > -1) { - resize_U = thread_array(ti).error_info; + resize_U = thread_array[ti].error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ U size: " << resize_U << std::endl; @@ -305,9 +305,9 @@ namespace BaskerNS //Resize L, if resize_L != -1 (meaning realloc-L is requested) if(resize_L != BASKER_MAX_IDX) { - const Int tsb = (-1*thread_array(ti).error_subblk)-1; + const Int tsb = (-1*thread_array[ti].error_subblk)-1; BASKER_MATRIX &L = - LL(thread_array(ti).error_blk)(tsb); + LL[thread_array[ti].error_blk][tsb]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -322,9 +322,9 @@ namespace BaskerNS //Resize U, if resize_U != -1 (meaning realloc-U is requested) if(resize_U != BASKER_MAX_IDX) { - const Int tsb = thread_array(ti).error_subblk; + const Int tsb = thread_array[ti].error_subblk; BASKER_MATRIX &U = - LU(thread_array(ti).error_blk)(tsb); + LU[thread_array[ti].error_blk][tsb]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -346,13 +346,13 @@ namespace BaskerNS //Though this could be done in parallel in the future for(Int p = 0; p < num_threads; p++) { - Int blk = S(0)(p); + Int blk = S[0][p]; //if(LL(blk)(0).w_fill == BASKER_TRUE) { //Clear workspace, whole column for(Int sb = 0; sb < LL_size(blk); sb++) { - BASKER_MATRIX &SL = LL(blk)(sb); + BASKER_MATRIX &SL = LL[blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -369,10 +369,10 @@ namespace BaskerNS Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A for(Int p = 0; p < num_threads; p++) { - Int blk = S(error_sep_lvl)(p); + Int blk = S[error_sep_lvl][p]; //if(LL(blk)(0).w_fill == BASKER_TRUE) { - BASKER_MATRIX &TM = LL(blk)(0); + BASKER_MATRIX &TM = LL[blk][0]; //printf( " > p=%d: scol_top = %d, scol = %d, ncol = %d\n",p,scol_top,TM.scol,TM.ncol ); for(Int i = scol_top + TM.scol; i < scol_top + (TM.scol+TM.ncol); i++) { @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL(thread_array(ti).error_blk)(0); + LL[thread_array[ti].error_blk][0]; //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -394,12 +394,12 @@ namespace BaskerNS gperm(i) = BASKER_MAX_IDX; }//for--to clear perm - thread_start(ti) = thread_array(ti).error_blk; + thread_start(ti) = thread_array[ti].error_blk; //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; for(Int i = 0; i < num_threads; i++) { @@ -451,9 +451,9 @@ namespace BaskerNS Int btab = btf_tabs_offset; for(Int ti = 0; ti < num_threads; ti++) { - Int c = thread_array(ti).error_blk; + Int c = thread_array[ti].error_blk; //Note: jdb we can make this into a switch - if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) + if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) { if (c >= btab) { thread_start(ti) = BASKER_MAX_IDX; @@ -463,7 +463,7 @@ namespace BaskerNS continue; }//end if NOERROR - if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) + if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { @@ -474,7 +474,7 @@ namespace BaskerNS return BASKER_ERROR; }//end if SINGULAR - if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) { std::cout << "ERROR_THREADS: " << ti << " DIAGBLK NOMALLOC blk=" << c @@ -482,16 +482,16 @@ namespace BaskerNS return BASKER_ERROR; }//end if NOMALLOC - if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) + if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) { - Int liwork = thread_array(ti).iws_size*thread_array(ti).iws_mult; - Int lework = thread_array(ti).ews_size*thread_array(ti).ews_mult; + Int liwork = thread_array[ti].iws_size*thread_array[ti].iws_mult; + Int lework = thread_array[ti].ews_size*thread_array[ti].ews_mult; BASKER_ASSERT(c >= 0, "nfactor_diag_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti << " DIAGBLK MALLOC blk=" << c - << " newsize=" << thread_array(ti).error_info + << " newsize=" << thread_array[ti].error_info << " for both L( " << c << " ) and U( " << c << " )" << std::endl; @@ -504,24 +504,24 @@ namespace BaskerNS for(Int i = 0; i < liwork; i++) { - thread_array(ti).iws(i) = (Int) 0; + thread_array[ti].iws(i) = (Int) 0; } for(Int i = 0; i < lework; i++) { - thread_array(ti).ews(i) = zero; + thread_array[ti].ews(i) = zero; } //Resize L - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, - thread_array(ti).error_info); + thread_array[ti].error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, - thread_array(ti).error_info); - L.mnnz = thread_array(ti).error_info; - L.nnz = thread_array(ti).error_info; + thread_array[ti].error_info); + L.mnnz = thread_array[ti].error_info; + L.nnz = thread_array[ti].error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; @@ -533,15 +533,15 @@ namespace BaskerNS } //Resize U - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, - thread_array(ti).error_info); + thread_array[ti].error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, - thread_array(ti).error_info); - U.mnnz = thread_array(ti).error_info; - U.nnz = thread_array(ti).error_info; + thread_array[ti].error_info); + U.mnnz = thread_array[ti].error_info; + U.nnz = thread_array[ti].error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; @@ -561,9 +561,9 @@ namespace BaskerNS } //Reset - thread_array(ti).error_type = BASKER_ERROR_NOERROR; - thread_array(ti).error_blk = BASKER_MAX_IDX; - thread_array(ti).error_info = BASKER_MAX_IDX; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_blk = BASKER_MAX_IDX; + thread_array[ti].error_info = BASKER_MAX_IDX; nthread_remalloc++; @@ -593,7 +593,7 @@ namespace BaskerNS { for(Int ti = 0; ti < num_threads; ti++) { - thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array[ti].error_type = BASKER_ERROR_NOERROR; } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp index 02a896d957c0..4bbd86507d9d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp @@ -95,6 +95,9 @@ namespace BaskerNS BASKER_INLINE int fill(); + BASKER_INLINE + void init_ptr(); + BASKER_INLINE void init_inc_lvl(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp index 4f12887c87ed..e40361e6f988 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp @@ -328,7 +328,7 @@ namespace BaskerNS if(nnz == _nnz) { copy_vec(_row_idx, _nnz, row_idx); - copy_vec(_val,_nnz, val); + copy_vec(_val, _nnz, val); } else { @@ -498,6 +498,13 @@ namespace BaskerNS return 0; } + template + BASKER_INLINE + void BaskerMatrix::init_ptr() + { + for (Int i = 0; i < ncol+1; i ++) col_ptr(i) = 0; + } + template BASKER_INLINE void BaskerMatrix::convert2D diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 499e00edd417..6613d992dbc2 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -150,14 +150,14 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL(b)(0); - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); - BASKER_MATRIX &M = ALM(b)(0); //A->blk + BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &M = ALM[b][0]; //A->blk #ifdef BASKER_2DL //printf("Accessing blk: %d kid: %d \n", b, kid); - INT_1DARRAY ws = LL(b)(0).iws; - ENTRY_1DARRAY X = LL(b)(0).ews; - Int ws_size = LL(b)(0).iws_size; + INT_1DARRAY ws = LL[b][0].iws; + ENTRY_1DARRAY X = LL[b][0].ews; + Int ws_size = LL[b][0].iws_size; #else //else if BASKER_2DL INT_1DARRAY ws = thread_array[kid].iws; ENTRY_1DARRAY X = thread_array[kid].ews; @@ -577,11 +577,11 @@ namespace BaskerNS } } if (!explicit_pivot) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; - thread_array(kid).error_info = k; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; + thread_array[kid].error_info = k; return BASKER_ERROR; } } @@ -676,17 +676,17 @@ namespace BaskerNS (int)kid, (long)b, (long)llnnz, (long)lnnz, (long)lcnt, (int)lnnz, (int)M.nrow, (long)newsize); } - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -701,17 +701,17 @@ namespace BaskerNS (int)kid, (long)b, (long)uunnz, (long)unnz+ucnt, (long)k, (int)uunnz, (int)M.nrow, (int)newsize); } - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = -1; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -981,10 +981,10 @@ namespace BaskerNS ) { //Setup variables - const Int wsb = S(0)(kid); + const Int wsb = S[0][kid]; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1011,18 +1011,18 @@ namespace BaskerNS ) { const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A - const Int b = S(lvl)(kid); + const Int b = S[lvl][kid]; //const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - const Int U_col = S(lvl)(kid); + BASKER_MATRIX &L = LL[b][0]; + const Int U_col = S[lvl][kid]; Int U_row = LU_size(U_col)-1; if(lvl > 0) { //U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); } - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int brow = L.srow; @@ -1128,14 +1128,14 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -1279,12 +1279,12 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; #else INT_1DARRAY ws = thread_array[kid].iws; Int ws_size = thread_array[kid].iws_size; @@ -1452,13 +1452,13 @@ namespace BaskerNS Int k, Int top, Int xnnz) { - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + Int ws_size = LL[wsb][l].iws_size; #else INT_1DARRAY ws = thread_array[kid].iws; ENTRY_1DARRAY X = thread_array[kid].ews; @@ -1534,10 +1534,10 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; //const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; @@ -1608,7 +1608,7 @@ namespace BaskerNS #endif //LL[X_col][X_row].p_size = 0; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_dense_offdiag_mov_L() @@ -1623,12 +1623,12 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; - const Int p_size = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; + const Int p_size = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -1658,17 +1658,17 @@ namespace BaskerNS (long)blkcol, (long)blkrow, (long)kid, (long)llnnz, (long)lnnz, (long)p_size ); } - thread_array(kid).error_blk = blkcol; - thread_array(kid).error_subblk = blkrow; + thread_array[kid].error_blk = blkcol; + thread_array[kid].error_subblk = blkrow; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } //BASKER_ASSERT(0==1, "REALLOC LOWER BLOCK\n"); @@ -1714,7 +1714,7 @@ namespace BaskerNS } #endif - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_offdiag_mov_L() @@ -1733,17 +1733,17 @@ namespace BaskerNS BASKER_BOOL A_option) { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //printf( " t_dense_back_solve_offdiag( LL(%d,%d) and ALM(%d,%d)\n", blkcol,blkrow,blkcol,blkrow ); #ifdef BASKER_DEBUG_NFACTOR_BLK - Int ws_size = LL(X_col)(X_row).iws_size; + Int ws_size = LL[X_col][X_row].iws_size; const Int brow = L.srow; const Int bcol = L.scol; printf("\n\n"); @@ -1832,7 +1832,7 @@ namespace BaskerNS #ifdef BASKER_2DL //LL[X_col][X_row].p_size = nnz; - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug @@ -1878,14 +1878,14 @@ namespace BaskerNS { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int ws_size = LL[X_col][X_row].iws_size; + Int nnz = LL[X_col][X_row].p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -2057,7 +2057,7 @@ namespace BaskerNS printf("kid %d Ending nnz: %d \n",kid, nnz); #endif //LL[X_col][X_row].p_size = nnz; - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 1fb5dc3fcc2b..48dae30f95c9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -130,14 +130,14 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_inc_lvl(Int kid) { - Int b = S(0)(kid); //Which blk from schedule - BASKER_MATRIX &L = LL(b)(0); - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); - BASKER_MATRIX &M = ALM(b)(0); //A->blk + Int b = S[0][kid]; //Which blk from schedule + BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &M = ALM[b][0]; //A->blk - INT_1DARRAY ws = LL(b)(0).iws; - ENTRY_1DARRAY X = LL(b)(0).ews; - Int ws_size = LL(b)(0).iws_size; + INT_1DARRAY ws = LL[b][0].iws; + ENTRY_1DARRAY X = LL[b][0].ews; + Int ws_size = LL[b][0].iws_size; Int brow = L.srow; //begining row Int lval = 0; @@ -384,10 +384,10 @@ namespace BaskerNS << pivot << endl; cout << "lcnt: " << lcnt << endl; } - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = b; - thread_array(kid).error_info = k; + thread_array[kid].error_blk = b; + thread_array[kid].error_info = k; return BASKER_ERROR; } @@ -410,17 +410,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = 0; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = 0; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -441,17 +441,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = b; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = b; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -665,13 +665,13 @@ namespace BaskerNS { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int brow = L.srow; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -936,12 +936,12 @@ namespace BaskerNS ) { //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -985,13 +985,13 @@ namespace BaskerNS //Will want to make this backward in the future //Setup variables - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; const Int brow = L.srow; - INT_1DARRAY ws = LL(wsb)(l).iws; - const Int ws_size = LL(wsb)(l).iws_size; + INT_1DARRAY ws = LL[wsb][l].iws; + const Int ws_size = LL[wsb][l].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1353,12 +1353,12 @@ namespace BaskerNS //We note that this can be fixed to be faster - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - const Int ws_size = LL(wsb)(l).iws_size; + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + const Int ws_size = LL[wsb][l].iws_size; Int brow = L.srow; @@ -1441,12 +1441,12 @@ namespace BaskerNS { //We note that this can be fixed to be faster - const Int b = S(lvl)(kid); - const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL(wsb)(l).iws; - ENTRY_1DARRAY X = LL(wsb)(l).ews; - const Int ws_size = LL(wsb)(l).iws_size; + const Int b = S[lvl][kid]; + const Int wsb = S[0][kid]; + BASKER_MATRIX &L = LL[b][0]; + INT_1DARRAY ws = LL[wsb][l].iws; + ENTRY_1DARRAY X = LL[wsb][l].ews; + const Int ws_size = LL[wsb][l].iws_size; Int brow = L.srow; Int *color = &(ws(0)); @@ -1555,14 +1555,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK printf("t_back_solve_diag, kid: %d blkcol: %d blkrow: %d \n", @@ -1696,7 +1696,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif return; @@ -1717,14 +1717,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //Int brow = L.srow; //Int bcol = L.scol; @@ -1869,14 +1869,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; Int brow = L.srow; Int bcol = L.scol; @@ -2065,12 +2065,12 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; - const Int p_size = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; + const Int p_size = LL[X_col][X_row].p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -2105,18 +2105,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = blkcol; - thread_array(kid).error_subblk = blkrow; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = blkcol; + thread_array[kid].error_subblk = blkrow; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } @@ -2155,14 +2155,14 @@ namespace BaskerNS //Fix later if(Options.same_pattern == BASKER_FALSE) { - for(Int i = 0; i < LL(X_col)(X_row).nrow; i++) + for(Int i = 0; i < LL[X_col][X_row].nrow; i++) { stack[i] = BASKER_MAX_IDX; } } L.col_ptr(k+1) = lnnz; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_offdiag_mov_L_inc_lvl() @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; /* @@ -2740,27 +2740,27 @@ namespace BaskerNS LP_col, LP_row, kid); */ - BASKER_MATRIX *UPP = &LU(UP_col)(0); + BASKER_MATRIX *UPP = &LU[UP_col][0]; if(UP_row != BASKER_MAX_IDX) { - UPP = &(LU(UP_col)(UP_row)); + UPP = &(LU[UP_col][UP_row]); } BASKER_MATRIX &UP = *(UPP); - BASKER_MATRIX *LPP = &LU(LP_col)(0); + BASKER_MATRIX *LPP = &LU[LP_col][0]; if(LP_row != BASKER_MAX_IDX) { - LPP = &(LL(LP_col)(LP_row)); + LPP = &(LL[LP_col][LP_row]); } BASKER_MATRIX &LP = *(LPP); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; @@ -2948,7 +2948,7 @@ namespace BaskerNS }//over all nonzero in left - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; return; @@ -2969,14 +2969,14 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL(X_col)(X_row).p_size; + Int nnz = LL[X_col][X_row].p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -3106,7 +3106,7 @@ namespace BaskerNS */ - Int temp = INC_LVL_TEMP(k_i+LL(blkcol)(0).srow) + L.inc_lvl(j) + 1; + Int temp = INC_LVL_TEMP(k_i+LL[blkcol][0].srow) + L.inc_lvl(j) + 1; /* printf("lower row: %d kid: %d inc: %d %d %d j: %d \n", @@ -3183,7 +3183,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL(X_col)(X_row).p_size = nnz; + LL[X_col][X_row].p_size = nnz; #endif //Debug @@ -3219,11 +3219,11 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - const Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + const Int ws_size = LL[X_col][X_row].iws_size; //const Int p_size = LL(X_col)(X_row).p_size; //NDE - warning: unused @@ -3296,7 +3296,7 @@ namespace BaskerNS } L.col_ptr(k+1) = lnnz; - LL(X_col)(X_row).p_size = 0; + LL[X_col][X_row].p_size = 0; return 0; }//end t_dense_offdiag_mov_L_inv_lvl() @@ -3315,12 +3315,12 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &B = ALM[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; @@ -3439,11 +3439,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3576,11 +3576,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL[blkcol][blkrow]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; + Int ws_size = LL[X_col][X_row].iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3758,16 +3758,16 @@ namespace BaskerNS BASKER_MATRIX *B; if(lower == BASKER_TRUE) { - B = &(ALM(blkcol)(blkrow)); + B = &(ALM[blkcol][blkrow]); } else { - B = &(AVM(blkcol)(blkrow)); + B = &(AVM[blkcol][blkrow]); } BASKER_MATRIX &M = *B; //BASKER_MATRIX &M = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -3840,9 +3840,9 @@ namespace BaskerNS ) { - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; const Int team_leader = find_leader(kid,sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //Int loop_col_idx = S(l)(kid); //printf("Reduce col fill called, kid: %d leader: %d \n", @@ -3857,12 +3857,12 @@ namespace BaskerNS for(Int blk = l+1; blk < endblk; ++blk) { // ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; + INT_1DARRAY &wsL = LL[leader_idx][blk].iws; //Int p_sizeL = LL(leader_idx)(blk).p_size; - Int ws_sizeL = LL(leader_idx)(blk).iws_size; + Int ws_sizeL = LL[leader_idx][blk].iws_size; // ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &ws = LL(my_idx)(blk).iws; - const Int ws_size = LL(my_idx)(blk).iws_size; + INT_1DARRAY &ws = LL[my_idx][blk].iws; + const Int ws_size = LL[my_idx][blk].iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -3875,7 +3875,7 @@ namespace BaskerNS Int *stackL = &(patternL[ws_sizeL]); //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { //if(kid==3) // { @@ -3941,12 +3941,12 @@ namespace BaskerNS //printf("===========T ADD ORIG FILL CALLED\n"); const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; //Int X_col = S(0)(leader_id); - Int X_col = S(0)(kid); + Int X_col = S[0][kid]; Int X_row = l+1; @@ -3977,7 +3977,7 @@ namespace BaskerNS //Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; //printf("=***== fill MY ID: %d LEADER ID: %d ===** \n", @@ -3987,7 +3987,7 @@ namespace BaskerNS { Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; /* printf("leader_id: %d kid: %d lvl: %d l: %d blk: %d %d \n", @@ -3996,16 +3996,16 @@ namespace BaskerNS */ Int my_row_leader = find_leader(kid, lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; - Int A_row = (lvl==l)?(2):S(bl)(kid)%(LU_size(A_col)); - if((S(bl)(kid)>14) && - (S(bl)(kid)>LU_size(A_col)) && + Int A_row = (lvl==l)?(2):S[bl][kid]%(LU_size(A_col)); + if((S[bl](kid)>14) && + (S[bl](kid)>LU_size(A_col)) && (lvl != 1)) { - Int tm = (S(bl)(kid)+1)/16; - A_row = ((S(bl)(kid)+1)-(tm*16))%LU_size(A_col); + Int tm = (S[bl][kid]+1)/16; + A_row = ((S[bl][kid]+1)-(tm*16))%LU_size(A_col); } /* diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 650bc77a8de6..9c77c1f38994 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -134,12 +134,12 @@ namespace BaskerNS double barrier_time = 0; #endif - Int U_col = S(lvl)(kid); + Int U_col = S[lvl][kid]; Int U_row = 0; - const Int scol = LU(U_col)(U_row).scol; - const Int ecol = LU(U_col)(U_row).ecol; - const Int ncol = LU(U_col)(U_row).ncol; + const Int scol = LU[U_col][U_row].scol; + const Int ecol = LU[U_col][U_row].ecol; + const Int ncol = LU[U_col][U_row].ncol; //for(Int k = scol; k < ecol; k++) //might have to use k+scol for barrier @@ -460,15 +460,15 @@ namespace BaskerNS const Entry zero (0.0); //Get needed variables - const Int L_col = S(l)(kid); - const Int U_col = S(lvl)(kid); + const Int L_col = S[l][kid]; + const Int U_col = S[lvl][kid]; - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; //Int my_new_row = // L_col - my_row_leader; Int U_row = L_col - my_row_leader; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -488,7 +488,7 @@ namespace BaskerNS //if(sep_flg == BASKER_FALSE) if(l == 0) { - Bp = &(AVM(U_col)(U_row)); + Bp = &(AVM[U_col][U_row]); //bbcol = Bp->scol; } else @@ -503,9 +503,9 @@ namespace BaskerNS // kid, X_col, X_row); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -649,17 +649,17 @@ namespace BaskerNS Int newsize = (unnz+U.nrow) * 1.2 ; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; }//if/else realloc } @@ -741,10 +741,10 @@ namespace BaskerNS std::cout << "----Error--- kid = " << kid << ": extra L[" << j << "]=" << X[j] << " with gperm( " << brow_g << " + " << j << " ) = " << t << std::endl; - thread_array(kid).error_type = BASKER_ERROR_OTHER; - thread_array(kid).error_blk = lvl; - thread_array(kid).error_subblk = l; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_OTHER; + thread_array[kid].error_blk = lvl; + thread_array[kid].error_subblk = l; + thread_array[kid].error_info = k; info = BASKER_ERROR; //BASKER_ASSERT(t != BASKER_MAX_IDX, "lower entry in U"); #endif @@ -864,14 +864,14 @@ namespace BaskerNS int lteam_size = pow(2, l); #ifdef BASKER_2DL - Int L_col = S(l)(my_leader); + Int L_col = S[l][my_leader]; Int L_row = 0; - Int U_col = S(lvl)(kid); - Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); - Int X_col = S(0)(my_leader); + Int U_col = S[lvl][kid]; + Int U_row = (lvl==1)?(kid%2):S[l][kid]%LU_size(U_col); + Int X_col = S[0][my_leader]; Int X_row = l; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; const Int bcol = U.scol; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only work with with 2D layout"); @@ -1066,11 +1066,11 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; //Get needed variables - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = 0; - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; const Int U_row = LU_size(U_col)-1; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; //Int col_idx_offset = 0; //can we get rid of now? #ifdef BASKER_DEBUG_NFACTOR_COL @@ -1080,10 +1080,10 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array(kid).C; + BASKER_MATRIX &B = thread_array[kid].C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1098,9 +1098,9 @@ namespace BaskerNS //B.print(); - INT_1DARRAY ws = LL(X_col)(l+1).iws; - const Int ws_size = LL(X_col)(l+1).iws_size; - ENTRY_1DARRAY X = LL(X_col)(l+1).ews; + INT_1DARRAY ws = LL[X_col][l+1].iws; + const Int ws_size = LL[X_col][l+1].iws_size; + ENTRY_1DARRAY X = LL[X_col][l+1].ews; Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -1327,10 +1327,10 @@ namespace BaskerNS X(maxindex) = pivot; } else { // replace-tiny-pivot not requested, or the current column is structurally empty after elimination - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = k; return BASKER_ERROR; } } else if (Options.replace_tiny_pivot && normA_blk > abs(zero) && abs(pivot) < normA_blk * sqrt(eps)) { @@ -1374,17 +1374,17 @@ namespace BaskerNS //cout << " > L_col = " << L_col << " L_row = " << L_row << endl; } - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -1399,17 +1399,17 @@ namespace BaskerNS << endl; } - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -1640,20 +1640,20 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); //U.fill(); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; //U.fill(); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int bcol = U.scol; @@ -1743,15 +1743,15 @@ namespace BaskerNS //Setup - Int A_col = S(lvl)(kid); - Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); + Int A_col = S[lvl][kid]; + Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); - BASKER_MATRIX &B = AVM(A_col)(A_col); + BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; team_leader = find_leader(kid, l); - const Int leader_idx = S(0)(team_leader); - Int loop_col_idx = S(l)(kid); + const Int leader_idx = S[0][team_leader]; + Int loop_col_idx = S[l][kid]; #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -1769,17 +1769,17 @@ namespace BaskerNS //Split over threads (leader and nonleader) for(Int blk=l+1; blk Accumulate the update from (l-1)th level: // LU(U_col)(U_row) -= L(U_col)(l-1) * U(l-1)(U_row) t_add_extend(thread, kid, lvl, l-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_FALSE); if(kid%((Int)pow(2, l)) == 0) @@ -248,9 +248,9 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier(thread, kid, my_leader, - b_size, 3, LU(U_col)(U_row).scol, 0); + b_size, 3, LU[U_col][U_row].scol, 0); for(Int ti = 0; ti < num_threads; ti++) { - if (thread_array(kid).error_type != BASKER_SUCCESS) { + if (thread_array[kid].error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -287,7 +287,7 @@ namespace BaskerNS printf( " kid=%d: calling t_add_extend(k=%d/%d)\n",kid,k,ncol ); fflush(stdout); #endif t_add_extend(thread, kid,lvl,lvl-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_TRUE); } #ifdef BASKER_TIMER @@ -336,7 +336,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 4, k, lvl-1); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array(tid).error_type != BASKER_SUCCESS) { + if (thread_array[tid].error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -395,7 +395,7 @@ namespace BaskerNS #ifdef BASKER_TIMER double time_factot = timer.seconds(); if((kid%(Int)(pow(2,lvl))) == 0) { - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = LU_size(U_col)-1; printf("Time Lower-Col(%d): %lf, n = %d, nnz(L) = %d, nnz(U) = %d \n", (int)kid, time_factot, @@ -446,7 +446,7 @@ namespace BaskerNS #endif //This will do the correct spmv - if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_upper_col_factor_offdiag2(kid, lvl, sl,l, k, lower); } //Barrier--Start @@ -461,7 +461,7 @@ namespace BaskerNS //Barrier--End if(kid%((Int)pow(2,sl)) == 0 && - thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_dense_blk_col_copy_atomic2(kid, my_leader, lvl, sl, l, k, lower); } @@ -477,7 +477,7 @@ namespace BaskerNS #endif }//over all sublevels - if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { + if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { t_dense_copy_update_matrix2(kid, my_leader, lvl, l, k); } }//end t_add_add @@ -507,15 +507,15 @@ namespace BaskerNS return; } - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); - const Int L_col = S(sl)(my_leader); - const Int U_col = S(lvl)(kid); - const Int X_col = S(0)(my_leader); + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + const Int L_col = S[sl][my_leader]; + const Int U_col = S[lvl][kid]; + const Int X_col = S[0][my_leader]; Int L_row = l-sl+1; //Might have to think about th Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; #ifdef BASKER_DEBUG_NFACTOR_COL2 if(L_row >= LL_size(L_col)) { @@ -588,10 +588,10 @@ namespace BaskerNS //Setup //printf("DEBUG, kid: %d k: %d A_col: %d A_row: %d \n", // kid, k, A_col, A_row); - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; #ifdef BASKER_DEBUG_NFACTOR_COL2 if(lower == BASKER_TRUE) { @@ -609,10 +609,10 @@ namespace BaskerNS Int endblk = (lower)?(LL_size(my_idx)):(l+2); for(Int blk = l+1; blk < endblk; ++blk) { - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; - Int p_sizeL = LL(leader_idx)(blk).p_size; - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + Int p_sizeL = LL[leader_idx][blk].p_size; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; Int *color = &(ws[0]); //printf( " + t_dense_blk_col_copy_atomic2(kid=%d: LL(%d)(%d) += LL(%d)(%d)\n",kid,leader_idx, blk,my_idx,blk); @@ -629,7 +629,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { color[jj] = 0; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -677,7 +677,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; } else { @@ -685,7 +685,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL(leader_idx)(blk).p_size = p_sizeL; + LL[leader_idx][blk].p_size = p_sizeL; //p_size = 0; //not needed }//over all blks } @@ -709,8 +709,8 @@ namespace BaskerNS //printf("\n\n\n\n"); const Entry zero (0.0); - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; //Over each blk @@ -724,10 +724,10 @@ namespace BaskerNS // X += B(:, k) { Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; - Int my_row_leader = S(0)(find_leader(kid,lvl-1)); - Int A_row = S(bl)(kid) - my_row_leader; + Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + Int A_row = S[bl][kid] - my_row_leader; BASKER_MATRIX *Bp; if(A_row != (LU_size(A_col)-1)) @@ -735,12 +735,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("copy, kid: %d bl: %d A: %d %d \n", @@ -749,7 +749,7 @@ namespace BaskerNS // X += B(:, k) BASKER_MATRIX &B = *Bp; - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; //printf( " -- t_dense_copy_update_matrix2(kid=%d: LL(%d)(%d) += B)\n",kid,leader_idx,bl ); //printf("ADDING UPDATES TO B\n"); //B.info(); @@ -800,9 +800,9 @@ namespace BaskerNS //For recounting patterns in dense blk //Need better sparse update - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; - const Int nrow = LL(leader_idx)(bl).nrow; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; + const Int nrow = LL[leader_idx][bl].nrow; Int *color = &(ws(0)); #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("moving, kid: %d A: %d %d %d %d p_size: %d \n", @@ -875,18 +875,18 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); - const Int U_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; + const Int U_col = S[lvl][leader_id]; Int L_row = 0; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 1425385d9f2e..ee72c5d32c7b 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -83,7 +83,7 @@ namespace BaskerNS ) { - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int U_row = 0; //const Int scol = LU(U_col)(U_row).scol; @@ -101,7 +101,7 @@ namespace BaskerNS //for(Int k = 0; k < 1; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -148,7 +148,7 @@ namespace BaskerNS //barrier k = 0 usedl1 t_basker_barrier_inc_lvl(thread,kid,my_leader, - b_size, 0, LU(U_col)(U_row).scol, 0); + b_size, 0, LU[U_col][U_row].scol, 0); //printf("1 kid: %d error_leader: %d lvl: %d \n", kid, error_leader, lvl); BASKER_BOOL error_flag = BASKER_FALSE; basker_barrier.ExitGet(error_leader, error_flag); @@ -172,7 +172,7 @@ namespace BaskerNS { //for(Int k = 2; k < 3; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -181,7 +181,7 @@ namespace BaskerNS #endif t_add_extend_inc_lvl(thread, kid,lvl,l-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_FALSE); //where to start again @@ -234,7 +234,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier_inc_lvl(thread, kid, my_leader, - b_size, 7, LU(U_col)(U_row).scol, 0); + b_size, 7, LU[U_col][U_row].scol, 0); #ifdef BASKER_DEBUG_NFACTOR_COL_INC if(kid == 0) @@ -248,7 +248,7 @@ namespace BaskerNS //if(lvl < 2) { //for(Int k=0; k < 1; ++k) - for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) + for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -259,7 +259,7 @@ namespace BaskerNS //printf("test: %d \n", LU(U_col)(U_row).scol); t_add_extend_inc_lvl(thread, kid,lvl,lvl-1, k, - LU(U_col)(U_row).scol, + LU[U_col][U_row].scol, BASKER_TRUE); Entry pivot = 0; if((kid%(Int)(pow(2,lvl))) == 0) @@ -577,12 +577,12 @@ namespace BaskerNS ) { l = l+1; - Int my_token = S(l)(kid); + Int my_token = S[l][kid]; Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S(l)(my_loc) != my_token) + if(S[l][my_loc] != my_token) { my_loc++; break; @@ -615,14 +615,14 @@ namespace BaskerNS //Get needed variables - const Int L_col = S(l)(kid); + const Int L_col = S[l][kid]; // const Int L_row = 0; //NDE - warning: unused - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); //Int my_new_row = // L_col - S(0)(my_row_leader); - Int U_row = L_col - S(0)(my_row_leader); + Int U_row = L_col - S[0][my_row_leader]; /* Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); @@ -642,7 +642,7 @@ namespace BaskerNS //U_row = my_new_row; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -654,13 +654,13 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Ask C++ guru if this is ok BASKER_MATRIX *Bp; if(l == 0) { - Bp = &(AVM(U_col)(U_row)); + Bp = &(AVM[U_col][U_row]); } else { @@ -674,9 +674,9 @@ namespace BaskerNS // } //B.print(); - INT_1DARRAY ws = LL(X_col)(X_row).iws; - const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + INT_1DARRAY ws = LL[X_col][X_row].iws; + const Int ws_size = LL[X_col][X_row].iws_size; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -920,18 +920,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { //printf("HERE\n"); - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; + thread_array[kid].error_info = newsize; return BASKER_ERROR; }//if/else realloc }//if need to realloc @@ -1086,26 +1086,26 @@ namespace BaskerNS // kid, lvl, sl, l); } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); - if((S(sl)(kid) > 14) && - (S(sl)(kid) > LU_size(U_col)) && + (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); + if((S[sl][kid] > 14) && + (S[sl][kid] > LU_size(U_col)) && (lvl != 1)) { //printf("lower offdiag new num, %d %d \n", // S(sl)(kid), LU_size(U_col)); - Int tm = (S(sl)(kid)+1)/16; - U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); + Int tm = (S[sl][kid]+1)/16; + U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); } //printf("UFF kid:%d U: %d %d new: %d leader: %d %d lvl: %d l: %d sl: %d \n", @@ -1116,12 +1116,12 @@ namespace BaskerNS //JDB PASS TEST U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us //Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1256,31 +1256,31 @@ namespace BaskerNS return; } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; Int U_row = 0; U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //Need to give them the output pattern - Int U_pattern_col = S(lvl)(kid); + Int U_pattern_col = S[lvl][kid]; Int my_pattern_leader = find_leader_inc_lvl(kid,l); - Int U_pattern_row = S(l+1)(my_pattern_leader) - - S(0)(my_row_leader); + Int U_pattern_row = S[l+1][my_pattern_leader] - + S[0][my_row_leader]; /* printf("Test mypleader: %d myrowleader: %d kid: %d\n", @@ -1292,7 +1292,7 @@ namespace BaskerNS */ - Int L_pattern_col = S(lvl)(kid); + Int L_pattern_col = S[lvl][kid]; Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -1418,26 +1418,26 @@ namespace BaskerNS return; } - const Int L_col = S(sl)(my_leader); + const Int L_col = S[sl][my_leader]; Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S(0)(my_row_leader); + L_col - S[0][my_row_leader]; // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); - if((S(sl)(kid) > 14) && - (S(sl)(kid) > LU_size(U_col)) && + (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); + if((S[sl][kid] > 14) && + (S[sl][kid] > LU_size(U_col)) && (lvl != 1)) { - Int tm = (S(sl)(kid)+1)/16; - U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); + Int tm = (S[sl][kid]+1)/16; + U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); } // printf("lowerspmv kid: %d U: %d %d new %d leader: %d %d lvl: %d %d %d \n", @@ -1448,12 +1448,12 @@ namespace BaskerNS U_row = my_new_row; - const Int X_col = S(0)(my_leader); + const Int X_col = S[0][my_leader]; Int X_row = l+1; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1538,8 +1538,8 @@ namespace BaskerNS ) { - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; // Int gbrow = 0; //NDE - warning: unused @@ -1549,11 +1549,11 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; Int A_row = 0; A_row = my_new_row; @@ -1564,12 +1564,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1580,10 +1580,10 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; Int *color = &(ws(0)); - LL(leader_idx)(bl).p_size = 0; + LL[leader_idx][bl].p_size = 0; //Get the columns pattern Int U_pattern_col = A_col; @@ -1606,7 +1606,7 @@ namespace BaskerNS //Copy into C - BASKER_MATRIX &Up = LU(U_pattern_col)(U_pattern_row); + BASKER_MATRIX &Up = LU[U_pattern_col][U_pattern_row]; for(Int i = Up.col_ptr(k); i < Up.col_ptr(k+1); i++) { const Int j = Up.row_idx(i); @@ -1620,7 +1620,7 @@ namespace BaskerNS //if there is a L if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &Lp = LL(L_pattern_col)(L_pattern_row); + BASKER_MATRIX &Lp = LL[L_pattern_col][L_pattern_row]; for(Int i = Lp.col_ptr(k)+1; i < Lp.col_ptr(k+1);i++) { const Int j = Lp.row_idx(i); @@ -1653,8 +1653,8 @@ namespace BaskerNS ) { - const Int leader_idx = S(0)(kid); - BASKER_MATRIX &C = thread_array(kid).C; + const Int leader_idx = S[0][kid]; + BASKER_MATRIX &C = thread_array[kid].C; Int nnz = 0; Int gbrow = 0; @@ -1672,24 +1672,24 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S(lvl)(kid); + Int A_col = S[lvl][kid]; Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S(bl)(kid) - S(0)(my_row_leader); + S[bl][kid] - S[0][my_row_leader]; //Int A_row = my_new_row; - Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); - if((S(bl)(kid) > 14) && - (S(bl)(kid) > LU_size(A_col)) && + Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); + if((S[bl][kid] > 14) && + (S[bl][kid] > LU_size(A_col)) && (lvl != 1)) { //printf("test cm %d %d %d \n", // kid, S(bl)(kid), LU_size(A_col)); - Int tm = (S(bl)(kid)+1)/16; - A_row = ((S(bl)(kid)+1) - (tm*16))%LU_size(A_col); + Int tm = (S[bl][kid]+1)/16; + A_row = ((S[bl][kid]+1) - (tm*16))%LU_size(A_col); } @@ -1708,12 +1708,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM(A_col)(A_row)); + Bp = &(AVM[A_col][A_row]); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM(A_col)(0)); + Bp = &(ALM[A_col][0]); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1724,8 +1724,8 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; //const Int brow = LL(leader_idx)(bl).srow; //const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; @@ -1789,11 +1789,11 @@ namespace BaskerNS //Int CM_idx = kid; - ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - INT_1DARRAY ws = LL(leader_idx)(bl).iws; - const Int ws_size = LL(leader_idx)(bl).ews_size; + ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + INT_1DARRAY ws = LL[leader_idx][bl].iws; + const Int ws_size = LL[leader_idx][bl].ews_size; // const Int brow = LL(leader_idx)(bl).srow; //NU //NDE - warning: unused - const Int nrow = LL(leader_idx)(bl).nrow; + const Int nrow = LL[leader_idx][bl].nrow; //Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk @@ -1883,12 +1883,12 @@ namespace BaskerNS ) { //Get needed variables - const Int L_col = S(lvl)(kid); + const Int L_col = S[lvl][kid]; const Int L_row = 0; - const Int U_col = S(lvl)(kid); + const Int U_col = S[lvl][kid]; const Int U_row = LU_size(U_col)-1; - const Int X_col = S(0)(kid); + const Int X_col = S[0][kid]; //Int col_idx_offset = 0; //can we get rid of now? @@ -1902,10 +1902,10 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL(L_col)(L_row); - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &L = LL[L_col][L_row]; + BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array(kid).C; + BASKER_MATRIX &B = thread_array[kid].C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1926,9 +1926,9 @@ namespace BaskerNS } */ - INT_1DARRAY ws = LL(X_col)(l+1).iws; - const Int ws_size = LL(X_col)(l+1).iws_size; - ENTRY_1DARRAY X = LL(X_col)(l+1).ews; + INT_1DARRAY ws = LL[X_col][l+1].iws; + const Int ws_size = LL[X_col][l+1].iws_size; + ENTRY_1DARRAY X = LL[X_col][l+1].ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -2201,17 +2201,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = L_col; - thread_array(kid).error_subblk = -1; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = L_col; + thread_array[kid].error_subblk = -1; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -2229,16 +2229,16 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; } else { - thread_array(kid).error_type = + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_blk = U_col; - thread_array(kid).error_subblk = U_row; - thread_array(kid).error_info = newsize; + thread_array[kid].error_blk = U_col; + thread_array[kid].error_subblk = U_row; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -2462,20 +2462,20 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S(lvl)(leader_id); + const Int L_col = S[lvl][leader_id]; Int L_row = 0; - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; + INT_1DARRAY ws = LL[X_col][X_row].iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; //const Int brow = U.srow; //const Int bcol = U.scol; @@ -2585,18 +2585,18 @@ namespace BaskerNS //const Int lteam_size = pow(2,l+1); //NDE - warning: unused // const Int L_col = S(lvl)(leader_id); //NDE - warning: unused // Int L_row = 0; //NDE - warning: unused - const Int U_col = S(lvl)(leader_id); + const Int U_col = S[lvl][leader_id]; Int U_row = LU_size(U_col)-1; - Int X_col = S(0)(leader_id); + Int X_col = S[0][leader_id]; Int X_row = l+1; //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU[U_col][U_row]; - INT_1DARRAY ws = LL(X_col)(X_row).iws; + INT_1DARRAY ws = LL[X_col][X_row].iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + ENTRY_1DARRAY X = LL[X_col][X_row].ews; if(kid == leader_id) { @@ -2621,11 +2621,11 @@ namespace BaskerNS const BASKER_BOOL lower ) { - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //If I an not a leader, then need to copy over if(kid != team_leader) @@ -2636,15 +2636,15 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused // Int p_sizeL = LL(leader_idx)(blk).p_size; //NDE - warning: unused // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; // const Int ws_size = LL(my_idx)(blk).iws_size; //NDE - warning: unused //Int p_size = LL(my_idx)(blk).p_size; - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; Int *color = &(ws[0]); // Int *pattern = &(color[ws_size]); //NDE - warning: unused // Int *stack = &(pattern[ws_size]); //NDE - warning: unused @@ -2682,7 +2682,7 @@ namespace BaskerNS - Int U_pattern_col = S(lvl)(kid); + Int U_pattern_col = S[lvl][kid]; Int U_pattern_row = BASKER_MAX_IDX; if(blk == l+1) @@ -2691,11 +2691,11 @@ namespace BaskerNS //S(0)(find_leader(kid,lvl)); //U_pattern_row = S(l+1)(kid) - //S(0)(my_pattern_leader); - U_pattern_row = S(l+1)(kid) - - S(0)(find_leader(kid,lvl-1)); + U_pattern_row = S[l+1][kid] - + S[0][find_leader(kid,lvl-1)]; } - Int L_pattern_col = S(lvl)(kid); + Int L_pattern_col = S[lvl][kid]; Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -2716,7 +2716,7 @@ namespace BaskerNS if(U_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &UP = LU(U_pattern_col)(U_pattern_row); + BASKER_MATRIX &UP = LU[U_pattern_col][U_pattern_row]; for(Int jj = UP.col_ptr(k); jj < UP.col_ptr(k+1); @@ -2730,7 +2730,7 @@ namespace BaskerNS }//if UPattern if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &LP = LL(L_pattern_col)(L_pattern_row); + BASKER_MATRIX &LP = LL[L_pattern_col][L_pattern_row]; for(Int jj = LP.col_ptr(k); jj < LP.col_ptr(k+1); jj++) @@ -2769,11 +2769,11 @@ namespace BaskerNS //BASKER_MATRIX &B = AVM(A_col)(A_col); - const Int my_idx = S(0)(kid); + const Int my_idx = S[0][kid]; //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S(0)(team_leader); + const Int leader_idx = S[0][team_leader]; //Int loop_col_idx = S(l)(kid); NU //#ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -2807,13 +2807,13 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused - Int p_sizeL = LL(leader_idx)(blk).p_size; + Int p_sizeL = LL[leader_idx][blk].p_size; // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; - INT_1DARRAY &ws = LL(my_idx)(blk).iws; - const Int ws_size = LL(my_idx)(blk).iws_size; + ENTRY_1DARRAY &X = LL[my_idx][blk].ews; + INT_1DARRAY &ws = LL[my_idx][blk].iws; + const Int ws_size = LL[my_idx][blk].iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -2845,7 +2845,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) + for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) { color[jj] = 0; @@ -2910,7 +2910,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL(my_idx)(blk).p_size = 0; + LL[my_idx][blk].p_size = 0; } else { @@ -2918,7 +2918,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL(leader_idx)(blk).p_size = p_sizeL; + LL[leader_idx][blk].p_size = p_sizeL; //p_size = 0; NOT USED }//over all blks } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index ccbd5a33b827..dc59708fe158 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -258,8 +258,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); Int k = btf_tabs(c); Int bcol = M.scol; @@ -294,9 +294,9 @@ namespace BaskerNS printf("Error: NaN diag in single factor\n"); } } - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } @@ -336,8 +336,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); - BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); + BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); Int bcol = M.scol; //JDB: brow hack: fix. @@ -373,9 +373,9 @@ namespace BaskerNS Mag rmin_ (0.0); //workspace - Int ws_size = thread_array(kid).iws_size; - INT_1DARRAY ws = thread_array(kid).iws; - ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + ENTRY_1DARRAY X = thread_array[kid].ews; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -580,9 +580,9 @@ namespace BaskerNS << " Column: " << k << std::endl; } - thread_array(kid).error_type = BASKER_ERROR_NAN; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_NAN; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } absv = abs(value); @@ -714,9 +714,9 @@ namespace BaskerNS pivot = normA_blk * eps; X(maxindex) = pivot; } else { - thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array(kid).error_blk = c; - thread_array(kid).error_info = k; + thread_array[kid].error_type = BASKER_ERROR_SINGULAR; + thread_array[kid].error_blk = c; + thread_array[kid].error_info = k; return BASKER_ERROR; } } @@ -780,16 +780,16 @@ namespace BaskerNS (long)btf_tabs(c), (long)btf_tabs(c+1), (long)(btf_tabs(c+1)-btf_tabs(c))); } - thread_array(kid).error_blk = c; + thread_array[kid].error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -804,16 +804,16 @@ namespace BaskerNS printf("blk: %ld column: %ld \n", (long)c, (long)k); } - thread_array(kid).error_blk = c; + thread_array[kid].error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; + thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array(kid).error_info = newsize; + thread_array[kid].error_type = BASKER_ERROR_REMALLOC; + thread_array[kid].error_info = newsize; return BASKER_ERROR; } } @@ -991,8 +991,8 @@ namespace BaskerNS ) { //printf("=======LOCAL REACH BTF SHORT CALLED (pattern[top=%d - 1] = %d) =====\n",(int)top, (int)j); - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,8 +1014,8 @@ namespace BaskerNS { //printf("=======LOCAL REACH BTF CALLED =====\n"); - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; /*{ printf("ws_size: %d \n", ws_size); @@ -1144,8 +1144,8 @@ namespace BaskerNS ) { - INT_1DARRAY ws = thread_array(kid).iws; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + Int ws_size = thread_array[kid].iws_size; /* printf("ws_size: %d \n", ws_size); @@ -1289,9 +1289,9 @@ namespace BaskerNS { const Entry zero (0.0); - INT_1DARRAY ws = thread_array(kid).iws; - ENTRY_1DARRAY X = thread_array(kid).ews; - Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array[kid].iws; + ENTRY_1DARRAY X = thread_array[kid].ews; + Int ws_size = thread_array[kid].iws_size; Int brow = L.srow; Int *color = &(ws(0)); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp index 69d06a6bd72e..82ea04be3754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp @@ -1096,11 +1096,19 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2) find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS - kokkos_order_init_2D iO(this); - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this); + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + bool alloc = true; + //bool keep_zeros = true; + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else - //Comeback + //Comeback #endif #ifdef BASKER_TIMER double init_2d_time = scotch_timer.seconds(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index cc20d3b21e78..fd11208ea309 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -117,9 +117,11 @@ namespace BaskerNS // thread.team_rank()); Int kid = basker->t_get_kid(thread); #endif + printf( " * kokkos_sfactor_init_factor(%d) *\n",kid ); fflush(stdout); basker->t_init_factor(kid); + printf( " * kokkos_sfactor_init_factor(%d) done *\n",kid ); fflush(stdout); //This needs to be done earlier in ordering now //basker->t_init_2DA(kid); @@ -159,7 +161,7 @@ int Basker::sfactor() printf("Total NNZ: %ld \n", (long)global_nnz); printf(" > blk_matching = %d\n", (int)Options.blk_matching ); printf("----------------------------------\n"); - printf("\n"); + printf("\n"); fflush(stdout); } } @@ -169,28 +171,45 @@ int Basker::sfactor() } //Allocate Factorspace - //printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", - // btf_tabs_offset,allocate_nd_workspace); + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", + btf_tabs_offset,allocate_nd_workspace); fflush(stdout); + #endif if(btf_tabs_offset != 0 && allocate_nd_workspace) { #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif #else #endif } + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_workspace <<\n"); fflush(stdout); + #endif //if(btf_tabs_offset != 0) { //Allocate workspace #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_WORKSPACE typedef Kokkos::TeamPolicy TeamPolicy; kokkos_sfactor_init_workspace iWS(setup_flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(setup_flag, p); + } + #endif #endif } @@ -292,7 +311,9 @@ int Basker::sfactor() double time2 = 0.0; double time3 = 0.0; Kokkos::Timer timer1; + Kokkos::Timer timer2; timer.reset(); + timer2.reset(); #endif //split_num = num_threads/2; @@ -303,7 +324,7 @@ int Basker::sfactor() printf("\n --------------- OVER DOMS ---------------\n"); printf("\n"); } - #define SHYLU_BASKER_STREE_LIST + //#define SHYLU_BASKER_STREE_LIST std::vector stree_list (num_threads); #ifdef SHYLU_BASKER_STREE_LIST Kokkos::parallel_for( @@ -313,7 +334,7 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) #endif { - Int blk = S(0)(p); + Int blk = S[0][p]; if(Options.verbose == BASKER_TRUE) { printf(" ============= DOMAIN BLK (p=%d) ============\n",(int)p); @@ -323,34 +344,34 @@ int Basker::sfactor() //printf("\n\n STREE SIZE: %d \n", AL[blk][0].ncol); //printf("Here 0\n"); //Find nnz_counts for leafs - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; - e_tree (ALM(blk)(0), stree_p, 1); + e_tree (ALM[blk][0], stree_p, 1); #else - e_tree (ALM(blk)(0), stree, 1); + e_tree (ALM[blk][0], stree, 1); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - post_order(ALM(blk)(0), stree_p); + post_order(ALM[blk][0], stree_p); #else - post_order(ALM(blk)(0), stree); + post_order(ALM[blk][0], stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - col_count (ALM(blk)(0), stree_p); + col_count (ALM[blk][0], stree_p); #else - col_count (ALM(blk)(0), stree); + col_count (ALM[blk][0], stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif @@ -362,17 +383,17 @@ int Basker::sfactor() printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - leaf_assign_nnz(LL(blk)(0), stree_p, 0); - leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree_p, 0); + leaf_assign_nnz(LL[blk][0], stree_p, 0); + leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree_p, 0); #else - leaf_assign_nnz(LL(blk)(0), stree, 0); - leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); + leaf_assign_nnz(LL[blk][0], stree, 0); + leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree, 0); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); #endif } @@ -380,22 +401,26 @@ int Basker::sfactor() ); Kokkos::fence(); #endif + #ifdef BASKER_TIMER + double dom_time = timer2.seconds(); + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl; + #endif for(Int p = 0; p < num_threads; ++p) { //Do off diag - Int blk = S(0)(p); + Int blk = S[0][p]; #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; #endif for(Int l =0; l < tree.nlvls; l++) { - Int U_col = S(l+1)(p); + Int U_col = S[l+1][p]; //Note: Need to think more about this flow //Should be subtracted by how many times in the //future - Int my_row_leader = S(0)(find_leader(p,l)); + Int my_row_leader = S[0][find_leader(p,l)]; //Int my_new_row = // blk - my_row_leader; Int U_row = blk-my_row_leader; @@ -416,10 +441,10 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM(U_col)(U_row), stree_p, + U_blk_sfactor(AVM[U_col][U_row], stree_p, gScol[l], gSrow[glvl], off_diag); #else - U_blk_sfactor(AVM(U_col)(U_row), stree, + U_blk_sfactor(AVM[U_col][U_row], stree, gScol[l], gSrow[glvl], off_diag); #endif #ifdef BASKER_TIMER @@ -448,11 +473,11 @@ int Basker::sfactor() //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); - L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); + L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); #else - U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); - L_assign_nnz(LL(blk)(l+1), stree, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); + L_assign_nnz(LL[blk][l+1], stree, fill_factor, 0); #endif #ifdef BASKER_TIMER time2 += timer1.seconds(); @@ -484,13 +509,17 @@ int Basker::sfactor() //over all the seps in a lvle #ifdef SHYLU_BASKER_STREE_LIST + //printf( " parallel for \n" ); Kokkos::parallel_for( "permute_col", p, KOKKOS_LAMBDA(const int pp) #else + //printf( " serial for \n" ); for(Int pp = 0; pp < p; pp++) #endif { - //printf( " -- level = %d separator = %d --\n",lvl,pp ); + #ifdef BASKER_TIMER + printf( " -- level = %d/%d separator = %d/%d --\n",lvl,tree.nlvls, pp,p ); fflush(stdout); + #endif //S blks Int ppp; ppp = pp*pow(tree.nparts, lvl+1); @@ -505,43 +534,50 @@ int Basker::sfactor() (long)U_col, (long)U_row, (long)lvl, (long)pp); #endif - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, //gScol[lvl], gSrow[pp]); - //printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); + #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; - S_blk_sfactor(ALM(U_col)(U_row), stree_p, - gScol(lvl), gSrow(pp)); + S_blk_sfactor(ALM[U_col][U_row], stree_p, + gScol[lvl], gSrow[pp]); #else - S_blk_sfactor(ALM(U_col)(U_row), stree, - gScol(lvl), gSrow(pp)); + S_blk_sfactor(ALM[U_col][U_row], stree, + gScol[lvl], gSrow[pp]); + #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); #endif - //printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); //S_assign_nnz(LL[U_col][U_row], stree, 0); if(Options.verbose == BASKER_TRUE) { - printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); + printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LL(U_col)(U_row), stree_p, 0); + S_assign_nnz(LL[U_col][U_row], stree_p, 0); #else - S_assign_nnz(LL(U_col)(U_row), stree, 0); + S_assign_nnz(LL[U_col][U_row], stree, 0); #endif //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { - printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); + printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); + S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree_p, 0); #else - S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); + S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree, 0); + #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",LU[U_col][LU_size(U_col)-1].nnz); fflush(stdout); #endif } #ifdef SHYLU_BASKER_STREE_LIST @@ -557,19 +593,20 @@ int Basker::sfactor() Int ppp; ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S(l+1)(ppp); - U_row = S(lvl+1)(ppp)%LU_size(U_col); + //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); + U_col = S[l+1][ppp]; + U_row = S[lvl+1][ppp]%LU_size(U_col); - Int my_row_leader = S(0)(find_leader(ppp,l)); + Int my_row_leader = S[0][find_leader(ppp,l)]; //Int my_new_row = // S(lvl+1)(ppp) - my_row_leader; - U_row = S(lvl+1)(ppp) - my_row_leader; + U_row = S[lvl+1][ppp] - my_row_leader; #ifdef BASKER_DEBUG_SFACTOR printf("offida sep, lvl: %d l: %d U_col: %d U_row: %d \n", lvl, l, U_col, U_row); @@ -578,11 +615,11 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM(U_col)(U_row), stree_p, - gScol(l), gSrow(pp), off_diag); + U_blk_sfactor(AVM[U_col][U_row], stree_p, + gScol[l], gSrow[pp], off_diag); #else - U_blk_sfactor(AVM(U_col)(U_row), stree, - gScol(l), gSrow(pp), off_diag); + U_blk_sfactor(AVM[U_col][U_row], stree, + gScol[l], gSrow[pp], off_diag); #endif //In symmetric will not need @@ -598,14 +635,15 @@ int Basker::sfactor() { printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + fflush(stdout); } double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); - L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); + L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); #else - U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); - L_assign_nnz(LL(inner_blk)(l-lvl), stree, fill_factor, 0); + U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); + L_assign_nnz(LL[inner_blk][l-lvl], stree, fill_factor, 0); #endif //printf("Here 1 \n"); } @@ -625,6 +663,9 @@ int Basker::sfactor() FREE(gScol); FREE(gSrow); + #ifdef BASKER_TIMER + std::cout << " >> symmetric_sfactor done << " << std::endl; + #endif return 0; }//end symmetric_symbolic() @@ -1151,7 +1192,6 @@ int Basker::sfactor() BASKER_SYMBOLIC_TREE &ST ) { -printf( " col_count:: view \n" ); //Still like to find a way to do this without transpose BASKER_MATRIX Mt; matrix_transpose(MV, Mt); @@ -2419,6 +2459,9 @@ printf( " col_count:: view \n" ); //printf("number of blks: %d \n", // btf_nblks-btf_tabs_offset); #endif + #ifdef BASKER_TIMER + printf( " > btf_last_dense(%s) <\n",(flag ? "true" : "false") ); fflush(stdout); + #endif Int max_blk_size = 0; #if defined(BASKER_SPLIT_A) @@ -2440,7 +2483,7 @@ printf( " col_count:: view \n" ); nnz = lblk_size*lblk_size; } //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - L_D(i).init_matrix("LBFT", + L_D[i].init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2448,9 +2491,9 @@ printf( " col_count:: view \n" ); nnz); //For pruning - L_D(i).init_pend(); + L_D[i].init_pend(); - U_D(i).init_matrix("UBFT", + U_D[i].init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2459,6 +2502,9 @@ printf( " col_count:: view \n" ); }//over all blks } #endif + #ifdef BASKER_TIMER + printf( " > top blocks done <\n" ); fflush(stdout); + #endif //Malloc L and U #ifdef BASKER_DEBUG_SFACTOR @@ -2487,7 +2533,7 @@ printf( " col_count:: view \n" ); nnz = lblk_size*lblk_size; } //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - LBTF(i-btf_tabs_offset).init_matrix("LBFT", + LBTF[i-btf_tabs_offset].init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2496,10 +2542,10 @@ printf( " col_count:: view \n" ); //For pruning //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); - LBTF(i-btf_tabs_offset).init_pend(); + LBTF[i-btf_tabs_offset].init_pend(); //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); - UBTF(i-btf_tabs_offset).init_matrix("UBFT", + UBTF[i-btf_tabs_offset].init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2511,6 +2557,9 @@ printf( " col_count:: view \n" ); //MALLOC workspace }//over all blks } + #ifdef BASKER_TIMER + printf( " > left blocks done <\n" ); fflush(stdout); + #endif //JDB: This needs to be fixed max_blk_size = BTF_D.nrow + BTF_C.nrow; @@ -2525,14 +2574,14 @@ printf( " col_count:: view \n" ); for(Int i = 0 ; i < num_threads; i++) { - thread_array(i).iws_size = max_blk_size; - thread_array(i).ews_size = max_blk_size; + thread_array[i].iws_size = max_blk_size; + thread_array[i].ews_size = max_blk_size; //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); + MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); } #ifdef BASKER_DEBUG_SFACTOR printf("Malloc Thread: %d iws: %d \n", @@ -2545,8 +2594,12 @@ printf( " col_count:: view \n" ); } } + #ifdef BASKER_TIMER + printf( " > btf_last_dense done <\n" ); + #endif }//end btf_last_dense() }//end namespace Bakser +#undef BASKER_TIMER #endif//endif BASKER_SFACTOR_NEWFRM_HPP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 64c041a6536c..ec7774a43f13 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -99,20 +99,20 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { - Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM(blk)(0), - LL(blk)(0), - LU(blk)(LU_size(blk)-1)); + Int blk = S[0][p]; + sfactor_nd_dom_estimate(ALM[blk][0], + LL[blk][0], + LU[blk][LU_size(blk)-1]); for(Int l=0; l < tree.nlvls; l++) { - Int U_col = S(l+1)(p); + Int U_col = S[l+1][p]; Int my_row_leader = find_leader(p,l); Int my_new_row = - blk - S(0)(my_row_leader); + blk - S[0][my_row_leader]; - Int U_row = (l==0)?(p%2):S(0)(p)%LU_size(U_col); + Int U_row = (l==0)?(p%2):S[0][p]%LU_size(U_col); if((blk > 14) && (blk > LU_size(U_col)) && (l!=0)) @@ -124,11 +124,11 @@ namespace BaskerNS //JDB TEST PASSED U_row = my_new_row; - sfactor_nd_upper_estimate(AVM(U_col)(U_row), - LU(U_col)(U_row)); + sfactor_nd_upper_estimate(AVM[U_col][U_row], + LU[U_col][U_row]); - sfactor_nd_lower_estimate(ALM(blk)(l+1), - LL(blk)(l+1)); + sfactor_nd_lower_estimate(ALM[blk][l+1], + LL[blk][l+1]); } // end for l @@ -138,41 +138,41 @@ namespace BaskerNS for(Int pp=0; pp < pow(tree.nparts, tree.nlvls-lvl-1); pp++) { Int ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S(lvl+1)(ppp); + Int U_col = S[lvl+1][ppp]; Int U_row = 0; - sfactor_nd_sep_estimate(ALM(U_col)(U_row), - LL(U_col)(U_row), - LU(U_col)(LU_size(U_col)-1)); + sfactor_nd_sep_estimate(ALM[U_col][U_row], + LL[U_col][U_row], + LU[U_col][LU_size(U_col)-1]); Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S(l+1)(ppp); + U_col = S[l+1][ppp]; Int my_row_leader = find_leader(ppp,l); Int my_new_row = - S(lvl+1)(ppp) - S(0)(my_row_leader); + S[lvl+1][ppp] - S[0][my_row_leader]; - U_row = S(lvl+1)(ppp)%LU_size(U_col); - if((S(lvl+1)(ppp) > 14) && - (S(lvl+1)(ppp) > LU_size(U_col)) + U_row = S[lvl+1][ppp]%LU_size(U_col); + if((S[lvl+1][ppp] > 14) && + (S[lvl+1][ppp] > LU_size(U_col)) ) { - Int tm = (S(lvl+1)(ppp)+1)/16; - U_row = ((S(lvl+1)(ppp)+1) - + Int tm = (S[lvl+1][ppp]+1)/16; + U_row = ((S[lvl+1][ppp]+1) - (tm*16))%LU_size(U_col); } //JDB TEST PASS U_row = my_new_row; - sfactor_nd_sep_upper_estimate(AVM(U_col)(U_row), - LU(U_col)(U_row)); + sfactor_nd_sep_upper_estimate(AVM[U_col][U_row], + LU[U_col][U_row]); sfactor_nd_sep_lower_estimate( - ALM(innerblk)(l-lvl), - LL(innerblk)(l-lvl)); + ALM[innerblk][l-lvl], + LL[innerblk][l-lvl]); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp index b01d3ec72632..b2fa1204cd86 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp @@ -293,7 +293,7 @@ namespace BaskerNS for(Int b = nblks_c-1; b>= 0; b--) { //---Lower solve - BASKER_MATRIX &LC = LBTF(b); + BASKER_MATRIX &LC = LBTF[b]; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n btf b=%ld (%d x %d), LBTF(%d)\n", (long)b, (int)LC.nrow, (int)LC.ncol, (int)b); #endif @@ -303,7 +303,7 @@ namespace BaskerNS //printVec(y,gn); - BASKER_MATRIX &UC = UBTF(b); + BASKER_MATRIX &UC = UBTF[b]; //U(C)\x -> y upper_tri_solve(UC,x,y); @@ -420,7 +420,7 @@ namespace BaskerNS for(Int b = btf_top_tabs_offset-1; b>= 0; b--) { //L(C)\x -> y - BASKER_MATRIX &LC = L_D(b); + BASKER_MATRIX &LC = L_D[b]; lower_tri_solve(LC, x, y); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after L solve (b=%d)\n",b ); fflush(stdout); @@ -429,7 +429,7 @@ namespace BaskerNS #endif //U(C)\y -> x - BASKER_MATRIX &UC = U_D(b); + BASKER_MATRIX &UC = U_D[b]; upper_tri_solve(UC, y, x); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after U solve\n" ); fflush(stdout); @@ -476,7 +476,7 @@ namespace BaskerNS //Forward solve on A for(Int b = 0; b < tree.nblks; ++b) { - BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &L = LL[b][0]; //L\x -> y lower_tri_solve(L, x, y, scol_top); @@ -500,7 +500,7 @@ namespace BaskerNS //Update offdiag for(Int bb = 1; bb < LL_size(b); ++bb) { - BASKER_MATRIX &LD = LL(b)(bb); + BASKER_MATRIX &LD = LL[b][bb]; //x = LD*y; #ifdef BASKER_DEBUG_SOLVE_RHS char filename[200]; @@ -549,7 +549,7 @@ namespace BaskerNS #endif //U\y -> x - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; upper_tri_solve(U, y, x, scol_top); // NDE: y , x positions swapped... // seems role of x and y changed... #ifdef BASKER_DEBUG_SOLVE_RHS @@ -568,7 +568,7 @@ namespace BaskerNS #endif //y = UB*x; - BASKER_MATRIX &UB = LU(b)(bb); + BASKER_MATRIX &UB = LU[b][bb]; neg_spmv(UB, x, y, scol_top); #ifdef BASKER_DEBUG_SOLVE_RHS diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp index f950e9bd6132..bfd6e2460062 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp @@ -346,10 +346,10 @@ namespace BaskerNS // Update off-diag in the block-row before the diag solve for(int bb = LL_size(b)-1; bb > 0; bb--) { - BASKER_MATRIX &LD = LL(b)(bb); + BASKER_MATRIX &LD = LL[b][bb]; neg_spmv_perm_tr(LD, x, y, scol_top); // update y as mod. rhs, x as solution } - BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &L = LL[b][0]; if (L.nrow != 0 && L.ncol != 0) // Avoid degenerate case e.g. empty block following nd-partitioning lower_tri_solve_tr(L, y, x, scol_top); // x and y should be equal after in M range... } @@ -373,10 +373,10 @@ namespace BaskerNS for(Int bb = 0; bb < LU_size(b)-1; bb++) { // update offdiag corresponding to the block-row - BASKER_MATRIX &UB = LU(b)(bb); + BASKER_MATRIX &UB = LU[b][bb]; neg_spmv_tr(UB, x, y, scol_top); } - BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &U = LU[b][LU_size(b)-1]; if (U.nrow != 0 && U.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(U, x, y, scol_top); } @@ -410,7 +410,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of { for(Int b = 0; b < btf_top_tabs_offset; b++) { - BASKER_MATRIX &UC = U_D(b); + BASKER_MATRIX &UC = U_D[b]; if ( b > 0 ) spmv_BTF_tr(b, BTF_D, x, y, false); @@ -418,7 +418,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC, x, y); - BASKER_MATRIX &LC = L_D(b); + BASKER_MATRIX &LC = L_D[b]; if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC, x, y); @@ -462,7 +462,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (nblks_c > 0) { Int offset = 0; for(Int b = 0; b < nblks_c; b++) { - BASKER_MATRIX &UC = UBTF(b); + BASKER_MATRIX &UC = UBTF[b]; // Update off-diag // Update X with Y @@ -472,7 +472,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC,x,y); - BASKER_MATRIX &LC = LBTF(b); + BASKER_MATRIX &LC = LBTF[b]; if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC,x,y); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp index 1248d7472b0e..bd5bc82efdbc 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp @@ -54,7 +54,7 @@ namespace BaskerNS #ifndef BASKER_KOKKOS FREE_INT_1DARRAY(iws); FREE_ENTRY_1DARRAY(ews); - C.Finalize(); + //C.Finalize(); #endif } @@ -129,13 +129,12 @@ namespace BaskerNS BASKER_INLINE ~basker_tree() { - //Finalize(); + Finalize(); }//end ~basker_tree BASKER_INLINE void Finalize() { - //printf("basker_tree Finalize todo \n"); if(nroots > 0) { FREE_INT_1DARRAY(roots); @@ -267,7 +266,7 @@ namespace BaskerNS ~basker_symbolic_tree() { - //Finalize(); + Finalize(); }//end ~basker_symbolic_tree BASKER_INLINE diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index be4c146e9c83..81e3c78c7f9c 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -827,16 +827,16 @@ namespace BaskerNS Int U_view_size = (U_view_count(i) > 0 ? U_view_count(i) : 1); if (U_view_size > 0) { - MALLOC_MATRIX_1DARRAY(AVM(i), U_view_size); - MALLOC_MATRIX_1DARRAY(LU(i), U_view_size); + MALLOC_MATRIX_1DARRAY(AVM[i], U_view_size); + MALLOC_MATRIX_1DARRAY(LU[i], U_view_size); } //Malloc AL subarray // NOTE: size at least one to allow empty block Int L_view_size = (L_view_count(i) > 0 ? L_view_count(i): 1); if (L_view_size > 0) { - MALLOC_MATRIX_1DARRAY(ALM(i), L_view_size); - MALLOC_MATRIX_1DARRAY(LL(i), L_view_size); + MALLOC_MATRIX_1DARRAY(ALM[i], L_view_size); + MALLOC_MATRIX_1DARRAY(LL[i], L_view_size); } LU_size(i) = U_view_count(i); @@ -1056,7 +1056,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((L_row+1 < LL_size(L_col)) && - (tree.row_tabs(r_idx+1) == ALM(L_col)(L_row+1).srow)) + (tree.row_tabs(r_idx+1) == ALM[L_col][L_row+1].srow)) { //printf( " > ALM(%d)(%d).srow = %d, row_tab(%d) = %d\n",L_col,L_row+1,ALM(L_col)(L_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); L_row++; @@ -1071,7 +1071,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((U_row+1 < LU_size(U_col)) && - (tree.row_tabs(r_idx+1) == AVM(U_col)(U_row+1).srow)) + (tree.row_tabs(r_idx+1) == AVM[U_col][U_row+1].srow)) { //printf( " + AVM(%d)(%d).srow = %d, row_tab(%d) = %d\n",U_col,U_row+1,AVM(U_col)(U_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); U_row++; @@ -1095,8 +1095,8 @@ namespace BaskerNS //Get Matrix Ref - BASKER_MATRIX &Ltemp = ALM(L_col)(L_row); - BASKER_MATRIX &Utemp = AVM(U_col)(U_row); + BASKER_MATRIX &Ltemp = ALM[L_col][L_row]; + BASKER_MATRIX &Utemp = AVM[U_col][U_row]; Int bcol = Ltemp.scol; //diag blk @@ -1162,11 +1162,11 @@ namespace BaskerNS for(Int sb = 0; sb < LL_size(b); ++sb) { //printf( " ALM(%d)(%d).clean_col()\n",b,sb ); - ALM(b)(sb).clean_col(); + ALM[b][sb].clean_col(); } for(Int sb = 0; sb < LU_size(b); ++sb) { - AVM(b)(sb).clean_col(); + AVM[b][sb].clean_col(); } }//for - over all blks @@ -1178,6 +1178,7 @@ namespace BaskerNS BASKER_INLINE int Basker::sfactor_copy() { + printf( " .. sfactor_copy ..\n" ); fflush(stdout); //Reorder A; //Match order if(match_flag == BASKER_TRUE) @@ -1322,9 +1323,15 @@ namespace BaskerNS #ifdef BASKER_KOKKOS BASKER_BOOL keep_zeros = BASKER_FALSE; BASKER_BOOL alloc = alloc_BTFA; //BASKER_FALSE; - kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else //Comeback #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 6009e346f73b..8ea5c54c8e89 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -144,17 +144,18 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_KOKKOS_NOINIT Kokkos::ViewAllocateWithoutInitializing #define INT_RANK2DARRAY Kokkos::View #define INT_1DARRAY Kokkos::View -#define INT_2DARRAY Kokkos::View #define ENTRY_1DARRAY Kokkos::View -#define ENTRY_2DARRAY Kokkos::View #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define MATRIX_1DARRAY Kokkos::View -#define MATRIX_2DARRAY Kokkos::View -#define MATRIX_VIEW_1DARRAY Kokkos::View -#define MATRIX_VIEW_2DARRAY Kokkos::View -#define THREAD_1DARRAY Kokkos::View -#define THREAD_2DARRAY Kokkos::View + +#define INT_2DARRAY std::vector +#define ENTRY_2DARRAY std::vector +#define MATRIX_1DARRAY std::vector +#define MATRIX_2DARRAY std::vector +#define MATRIX_VIEW_1DARRAY std::vector +#define MATRIX_VIEW_2DARRAY std::vector +#define THREAD_1DARRAY std::vector +#define THREAD_2DARRAY std::vector #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +164,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s); \ + /*a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -172,7 +174,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s); \ + /*a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -181,7 +184,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1); \ + /*a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1);*/ \ + Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } @@ -189,7 +193,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_2DARRAY("int_2d",s); \ + /*a = INT_2DARRAY("int_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -198,7 +203,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s); \ + /*a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -207,7 +213,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_2DARRAY("entry_2d",s); \ + /*a = ENTRY_2DARRAY("entry_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -216,7 +223,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s); \ + /*a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -225,7 +233,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_2DARRAY("bool_2d", s); \ + /*a = BOOL_2DARRAY("bool_2d", s);*/ \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -234,7 +243,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_1DARRAY("matrix_1d",s); \ + /*a = MATRIX_1DARRAY("matrix_1d",s)*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -243,7 +253,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_2DARRAY("matrix_2d",s); \ + /*a = MATRIX_2DARRAY("matrix_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,7 +263,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s); \ + /*a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -261,7 +273,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s); \ + /*a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -270,7 +283,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_1DARRAY("thread_1d",s); \ + /*a = THREAD_1DARRAY("thread_1d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -279,9 +293,10 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_2DARRAY("thread_2d",s); \ + /*a = THREAD_2DARRAY("thread_2d",s);*/ \ + a.resize(s); \ if(a.data() == NULL) \ - throw std::bad_alloc(); \ + throw std::bad_alloc(); \ } \ } //RESIZE (with copy) @@ -334,77 +349,92 @@ enum BASKER_INCOMPLETE_CODE #define FREE(a) BASKER_NO_OP -#define FREE_INT_1DARRAY_PAIRS(a) \ - { \ - a = INT_1DARRAY_PAIRS(); \ +#define FREE_INT_1DARRAY_PAIRS(a) \ + { \ + /*a = INT_1DARRAY_PAIRS();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_1DARRAY(a) \ - { \ - a = INT_1DARRAY(); \ +#define FREE_INT_1DARRAY(a) \ + { \ + /*a = INT_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_RANK2DARRAY(a) \ - { \ - a = INT_RANK2DARRAY(); \ +#define FREE_INT_RANK2DARRAY(a) \ + { \ + /*a = INT_RANK2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_INT_2DARRAY(a,n) \ - { \ - a = INT_2DARRAY(); \ +#define FREE_INT_2DARRAY(a,n) \ + { \ + /*a = INT_2DARRAY();*/ \ + a.resize(0); \ } -#define FREE_ENTRY_1DARRAY(a) \ - { \ - a = ENTRY_1DARRAY(); \ +#define FREE_ENTRY_1DARRAY(a) \ + { \ + /*a = ENTRY_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_2DARRAY(a,n) \ - { \ - a = ENTRY_2DARRAY(); \ +#define FREE_ENTRY_2DARRAY(a,n) \ + { \ + /*a = ENTRY_2DARRAY();*/ \ + a.resize(0); \ } -#define FREE_BOOL_1DARRAY(a) \ - { \ - a = BOOL_1DARRAY(); \ +#define FREE_BOOL_1DARRAY(a) \ + { \ + /*a = BOOL_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_2DARRAY(a,n) \ - { \ - a = BOOL_2DARRAY(); \ +#define FREE_BOOL_2DARRAY(a,n) \ + { \ + /*a = BOOL_2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_1DARRAY(a) \ - { \ - a = MATRIX_1DARRAY(); \ +#define FREE_MATRIX_1DARRAY(a) \ + { \ + /*a = MATRIX_1DARRAY();*/ \ + a.resize(0); \ } -#define FREE_MATRIX_2DARRAY(a,n) \ - { \ - a = MATRIX_2DARRAY(); \ +#define FREE_MATRIX_2DARRAY(a,n) \ + { \ + /*a = MATRIX_2DARRAY();*/ \ + a.resize(0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ - { \ - a = MATRIX_VIEW_1DARRAY(); \ + { \ + /*a = MATRIX_VIEW_1DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ - { \ - a = MATRIX_VIEW_2DARRAY(); \ +#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ + { \ + /*a = MATRIX_VIEW_2DARRAY();*/ \ + a.resize(0); \ } #define FREE_THREAD_1DARRAY(a) \ - { \ - a = THREAD_1DARRAY(); \ + { \ + /*a = THREAD_1DARRAY();*/ \ + a.resize(0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - a = TRHEAD_2DARRAY(); \ +#define FREE_THREAD_2DARRAY(a,n) \ + { \ + /*a = TRHEAD_2DARRAY();*/ \ + Kokkos::resize(a,0); \ } -#else +#else // not BASKER_KOKKOS + //Execution Space #define BASKER_EXE_SPACE void* //ReMacro Basker Classes diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 130f62ea6127..9cf52f3db66d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -252,11 +252,11 @@ namespace BaskerNS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -291,12 +291,11 @@ namespace BaskerNS #ifdef BASKER_KOKKOS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; - Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + Kokkos::parallel_for(TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -328,7 +327,7 @@ namespace BaskerNS { for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = LBTF(b-btf_tabs_offset); + BASKER_MATRIX &L = LBTF[b-btf_tabs_offset]; L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -343,7 +342,7 @@ namespace BaskerNS #if defined(BASKER_SPLIT_A) for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = L_D(b); + BASKER_MATRIX &L = L_D[b]; L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -359,7 +358,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { @@ -368,8 +367,8 @@ namespace BaskerNS b, row, kid, LL[b][row].nnz); #endif - LL(b)(row).clear_pend(); - LL(b)(row).nnz = LL(b)(row).mnnz; + LL[b][row].clear_pend(); + LL[b][row].nnz = LL[b][row].mnnz; }//end over all row }//end select which thread @@ -379,7 +378,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -388,9 +387,9 @@ namespace BaskerNS #endif //LU(b)(LU_size(b)-1).nnz = 0; - for(Int kk = 0; kk < LU(b)(LU_size(b)-1).ncol+1; kk++) + for(Int kk = 0; kk < LU[b][LU_size(b)-1].ncol+1; kk++) { - LU(b)(LU_size(b)-1).col_ptr(kk) = 0; + LU[b][LU_size(b)-1].col_ptr(kk) = 0; } /* @@ -400,16 +399,16 @@ namespace BaskerNS LU(b)(LU_size(b)-1).mnnz); */ - LU(b)(LU_size(b)-1).nnz = LU(b)(LU_size(b)-1).mnnz; + LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S(0)(my_row_leader); + b - S[0][my_row_leader]; - Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); //JDB TEST PASS U_row = my_new_row; @@ -420,9 +419,9 @@ namespace BaskerNS LU[U_col][U_row].nnz); #endif - for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) + for(Int kk = 0; kk < LU[U_col][U_row].ncol+1; kk++) { - LU(U_col)(U_row).col_ptr(kk) = 0; + LU[U_col][U_row].col_ptr(kk) = 0; } /* printf("flipU (%d,%d) %d %d \n", @@ -431,7 +430,7 @@ namespace BaskerNS LU(U_col)(U_row).mnnz); */ - LU(U_col)(U_row).nnz = LU(U_col)(U_row).mnnz; + LU[U_col][U_row].nnz = LU[U_col][U_row].mnnz; //LU(U_col)(U_row).nnz = 0; }//over inner lvls @@ -455,13 +454,13 @@ namespace BaskerNS Kokkos::Timer timer_init_matrixL; Kokkos::Timer timer_fill_matrixL; timer_initL.reset(); + printf( " > t_init_factor( tid = %d, nlvls = %d ) <\n",kid,tree.nlvls+1 ); fflush(stdout); #endif - //printf( " > t_init_factor( tid = %d ) <\n",kid ); for(Int lvl = 0; lvl < tree.nlvls+1; lvl++) { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { @@ -472,34 +471,39 @@ namespace BaskerNS #ifdef BASKER_TIMER timer_init_matrixL.reset(); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL[b][row].nnz, (int)LL[b][row].mnnz); fflush(stdout); #endif - //printf( " lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); - LL(b)(row).init_matrix("Loffdig", - LL(b)(row).srow, - LL(b)(row).nrow, - LL(b)(row).scol, - LL(b)(row).ncol, - LL(b)(row).nnz); + LL[b][row].init_matrix("Loffdig", + LL[b][row].srow, + LL[b][row].nrow, + LL[b][row].scol, + LL[b][row].ncol, + LL[b][row].nnz); #ifdef BASKER_TIMER + printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); #endif //Fix when this all happens in the future if(Options.incomplete == BASKER_TRUE) { - LL(b)(row).init_inc_lvl(); + LL[b][row].init_inc_lvl(); } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); + printf( " ++ zero out (%d) ++\n",int(LL[b][row].col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - Kokkos::deep_copy(LL(b)(row).col_ptr, 0); + LL[b][row].init_ptr(); + //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL[b][row].ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - //printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); - LL(b)(row).init_pend(); - + LL[b][row].init_pend(); + #ifdef BASKER_TIMER + printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); + #endif }//end over all row }//end select which thread }//end for over all lvl @@ -508,6 +512,7 @@ namespace BaskerNS std::cout << " > Basker t_init_factor::initL(" << kid << "): time: " << initL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::initMatrix(" << kid << "): time: " << init_matrixL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::fillMatrix(" << kid << "): time: " << fill_matrixL_time << std::endl; + fflush(stdout); #endif //U @@ -519,7 +524,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -528,25 +533,26 @@ namespace BaskerNS #endif //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); - LU(b)(LU_size(b)-1).init_matrix("Udiag", - LU(b)(LU_size(b)-1).srow, - LU(b)(LU_size(b)-1).nrow, - LU(b)(LU_size(b)-1).scol, - LU(b)(LU_size(b)-1).ncol, - LU(b)(LU_size(b)-1).nnz); + LU[b][LU_size(b)-1].init_matrix("Udiag", + LU[b][LU_size(b)-1].srow, + LU[b][LU_size(b)-1].nrow, + LU[b][LU_size(b)-1].scol, + LU[b][LU_size(b)-1].ncol, + LU[b][LU_size(b)-1].nnz); //LU(b)(LU_size(b)-1).fill(); - Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); + LU[b][LU_size(b)-1].init_ptr(); + //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S(0)(my_row_leader); + b - S[0][my_row_leader]; - Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); if( (b > 14) && // NDE: Why is 14 specifically used here? (b > LU_size(U_col)) && @@ -577,19 +583,20 @@ namespace BaskerNS #endif //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); - LU(U_col)(U_row).init_matrix("Uoffdiag", - LU(U_col)(U_row).srow, - LU(U_col)(U_row).nrow, - LU(U_col)(U_row).scol, - LU(U_col)(U_row).ncol, - LU(U_col)(U_row).nnz); + LU[U_col][U_row].init_matrix("Uoffdiag", + LU[U_col][U_row].srow, + LU[U_col][U_row].nrow, + LU[U_col][U_row].scol, + LU[U_col][U_row].ncol, + LU[U_col][U_row].nnz); //LU(U_col)(U_row).fill(); - Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); + LU[U_col][U_row].init_ptr(); + //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { - LU(U_col)(U_row).init_inc_lvl(); + LU[U_col][U_row].init_inc_lvl(); } }//over inner lvls @@ -624,15 +631,15 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int row = 0; row < LL_size(b); row++) { #ifdef BASKER_DEBUG_INIT printf("ALM Factor Init: %d %d , kid: %d, nnz: %d nrow: %d ncol: %d \n", - b, row, kid, ALM(b)(row).nnz, - ALM(b)(row).nrow, - ALM(b)(row).ncol); + b, row, kid, ALM[b][row].nnz, + ALM[b][row].nrow, + ALM[b][row].ncol); #endif /*if (kid == 1) @@ -647,7 +654,7 @@ namespace BaskerNS printf("ALM(%d,%d: %dx%d) alloc with A: kid=%d btf=%d\n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid, Options.btf); #endif - ALM(b)(row).convert2D(A, alloc, kid); + ALM[b][row].convert2D(A, alloc, kid); } else { @@ -656,7 +663,7 @@ namespace BaskerNS printf("ALM(%d,%d, %dx%d) alloc (btf) with BTF_A: kid=%d \n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid); #endif - ALM(b)(row).convert2D(BTF_A, alloc, kid); + ALM[b][row].convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < ALM(b)(row).ncol; j++) { @@ -676,14 +683,14 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", b, LU_size(b)-1, kid, - AVM(b)(LU_size(b)-1).nnz, - AVM(b)(LU_size(b)-1).nrow, - AVM(b)(LU_size(b)-1).ncol); + AVM[b][LU_size(b)-1].nnz, + AVM[b][LU_size(b)-1].nrow, + AVM[b][LU_size(b)-1].ncol); #endif /*if (kid == 1) { @@ -692,13 +699,13 @@ namespace BaskerNS }*/ if(Options.btf == BASKER_FALSE) { - AVM(b)(LU_size(b)-1).convert2D(A, alloc, kid); + AVM[b][LU_size(b)-1].convert2D(A, alloc, kid); } else { //printf("Using BTF AU\n"); //printf(" > kid=%d: convert2D AVM(%d,%d)\n", kid, b, LU_size(b)-1); - AVM(b)(LU_size(b)-1).convert2D(BTF_A, alloc, kid); + AVM[b][LU_size(b)-1].convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < AVM(b)(LU_size(b)-1).ncol; j++) { @@ -715,10 +722,10 @@ namespace BaskerNS //TEST Int my_leader = find_leader(kid,l-1); - Int my_leader_row = S(0)(my_leader); + Int my_leader_row = S[0][my_leader]; //Int my_col_size = pow(2,l); Not used Int my_new_row = - (S(lvl)(kid) - my_leader_row); + (S[lvl][kid] - my_leader_row); //my_new_row = my_new_row%my_col_size; /* @@ -729,7 +736,7 @@ namespace BaskerNS my_col_size, my_new_row); */ - Int U_col = S(l)(kid); + Int U_col = S[l][kid]; Int U_row = my_new_row; //Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); @@ -755,9 +762,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init AUM: %d %d lvl: %d l: %d kid: %d nnz: %d nrow: %d ncol: %d \n", U_col, U_row, lvl, l, kid, - AVM(U_col)(U_row).nnz, - AVM(U_col)(U_row).nrow, - AVM(U_col)(U_row).ncol); + AVM[U_col][U_row].nnz, + AVM[U_col][U_row].nrow, + AVM[U_col][U_row].ncol); #endif #if 0 @@ -775,9 +782,9 @@ namespace BaskerNS { //printf(" %d: Using BTF AVM(%d,%d), %dx%d\n",kid,U_col,U_row, AVM(U_col)(U_row).nrow,AVM(U_col)(U_row).ncol); //printf("2nd convert AVM: %d %d size:%d kid: %d\n", - // U_col, U_row, AVM(U_col)(U_row).nnz, + // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); - AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); + AVM[U_col][U_row].convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); } @@ -807,22 +814,22 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S(lvl)(kid); + Int b = S[lvl][kid]; for(Int l = 0; l < LL_size(b); l++) { //defining here - LL(b)(l).iws_size = LL(b)(l).nrow; + LL[b][l].iws_size = LL[b][l].nrow; //This can be made smaller, see notes in Sfactor_old - LL(b)(l).iws_mult = 5; - LL(b)(l).ews_size = LL(b)(l).nrow; + LL[b][l].iws_mult = 5; + LL[b][l].ews_size = LL[b][l].nrow; //This can be made smaller, see notes in sfactor_old - LL(b)(l).ews_mult = 2; + LL[b][l].ews_mult = 2; - Int iws_size = LL(b)(l).iws_size; - Int iws_mult = LL(b)(l).iws_mult; - Int ews_size = LL(b)(l).ews_size; - Int ews_mult = LL(b)(l).ews_mult; + Int iws_size = LL[b][l].iws_size; + Int iws_mult = LL[b][l].iws_mult; + Int ews_size = LL[b][l].ews_size; + Int ews_mult = LL[b][l].ews_mult; if(iws_size > max_sep_size) { @@ -835,10 +842,10 @@ namespace BaskerNS } BASKER_ASSERT((iws_size*iws_mult)>0, "util iws"); - MALLOC_INT_1DARRAY(LL(b)(l).iws, iws_size*iws_mult); + MALLOC_INT_1DARRAY(LL[b][l].iws, iws_size*iws_mult); for(Int i=0; i 0) { BASKER_ASSERT((ews_size*ews_mult)>0, "util ews"); - MALLOC_ENTRY_1DARRAY(LL(b)(l).ews, ews_size*ews_mult); + MALLOC_ENTRY_1DARRAY(LL[b][l].ews, ews_size*ews_mult); for(Int i=0; i::find_leader(Int kid, Int l) { l = l+1; - Int my_token = S(l)(kid); + Int my_token = S[l][kid]; Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S(l)(my_loc) != my_token) + if(S[l][my_loc] != my_token) { my_loc++; break; @@ -2477,4 +2485,5 @@ namespace BaskerNS }//end namespace basker +#undef BASKER_TIMER #endif //end basker_util_hpp From e484f62bbdade30959fa3541503bd993ea7faf64 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 24 Oct 2024 16:12:13 -0600 Subject: [PATCH 2/5] ShyLU - Basker : memory tune Signed-off-by: iyamazaki --- .../basker/src/shylubasker_decl.hpp | 7 -- .../basker/src/shylubasker_nfactor.hpp | 17 +--- .../basker/src/shylubasker_sfactor.hpp | 87 +++++++++++-------- .../basker/src/shylubasker_structs.hpp | 1 - .../basker/src/shylubasker_types.hpp | 33 +------ .../basker/src/shylubasker_util.hpp | 13 ++- 6 files changed, 69 insertions(+), 89 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp index 94f4ba1df086..f9b33e325bd7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp @@ -1494,13 +1494,6 @@ namespace BaskerNS //end NDE - - //RHS and solutions (These are not used anymore) - ENTRY_2DARRAY rhs; - ENTRY_2DARRAY sol; - Int nrhs; - - BASKER_TREE part_tree; BASKER_TREE tree; BASKER_SYMBOLIC_TREE stree; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp index d2c6a5690528..cef593230d5e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp @@ -171,13 +171,9 @@ namespace BaskerNS }//end while if(Options.verbose == BASKER_TRUE) { - printf("Time DOMAIN: %lf \n", timer.seconds()); + printf("Time DOMAIN: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time DOMAIN: %lf \n", timer.seconds()); - timer.reset(); - #endif #else// else basker_kokkos #pragma omp parallel @@ -282,13 +278,9 @@ namespace BaskerNS //printf( " End Sep: info = %d (%d, %d)\n",info,BASKER_SUCCESS,BASKER_ERROR ); if(Options.verbose == BASKER_TRUE) { - printf("Time SEP: %lf \n", timer.seconds()); + printf("Time SEP: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time SEP: %lf \n", timer.seconds()); - timer.reset(); - #endif } // ---------------------------------------------------------------------------------------- // @@ -363,11 +355,8 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf("Time BTF: %lf \n", timer.seconds()); + printf("Time BTF: %lf \n\n", timer.seconds()); } - #ifdef BASKER_TIMER - printf("Time BTF: %lf \n", timer.seconds()); - #endif }//end btf call Kokkos::Timer tzback; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index fd11208ea309..d48f0e720a7e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -381,7 +381,7 @@ int Basker::sfactor() if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); - printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); + printf( " >> leaf_assign_nnz(LU(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); @@ -403,7 +403,7 @@ int Basker::sfactor() #endif #ifdef BASKER_TIMER double dom_time = timer2.seconds(); - std::cout << " DOMAIN BLKs done : " << dom_time << std::endl; + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl << std::endl; #endif for(Int p = 0; p < num_threads; ++p) @@ -462,16 +462,15 @@ int Basker::sfactor() //Assign nnz counts for leaf off-diag //U_assign_nnz(LU[U_col][U_row], stree, 0); //L_assign_nnz(LL[blk][l+1], stree, 0); - if(Options.verbose == BASKER_TRUE) - { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)blk,(int)l+1); - } #ifdef BASKER_TIMER timer1.reset(); #endif - //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; + if(Options.verbose == BASKER_TRUE) + { + printf( " ++ U_assign_nnz(LU(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)U_col,(int)U_row, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + } #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); @@ -490,7 +489,7 @@ int Basker::sfactor() std::cout << " >> symmetric_sfactor::domain : " << timer.seconds() << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::postorder : " << time1_2 << " + " << time1_3 << " + " << time1 << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::init : " << time2 << " seconds" << std::endl; - std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl; + std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl << std::endl; timer.reset(); #endif @@ -631,13 +630,13 @@ int Basker::sfactor() //Assign nnz + double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; if(Options.verbose == BASKER_TRUE) { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + printf( " ++ leaf_assign_nnz(LU(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)U_col,(int)U_row, (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ leaf_assign_nnz(LL(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)inner_blk,(int)(l-lvl), (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); fflush(stdout); } - double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); @@ -2260,6 +2259,9 @@ int Basker::sfactor() Int option ) { + #ifdef BASKER_TIMER + printf("leaf_assign_nnz:\n"); + #endif if(option == 0) { const Int Int_MAX = std::numeric_limits::max(); @@ -2268,19 +2270,23 @@ int Basker::sfactor() for(Int i = 0; i < M.ncol; i++) { if (t_nnz <= Int_MAX - ST.col_counts[i]) { + #ifdef BASKER_TIMER + //printf( " > %d: %d += %d\n",i,t_nnz, ST.col_counts[i] ); + #endif t_nnz += ST.col_counts[i]; } else { // let's just hope it is enough, if overflow break; } } - #ifdef BASKER_DEBUG_SFACTOR - printf("leaf nnz: %ld \n", (long)t_nnz); + #ifdef BASKER_TIMER + printf(" > leaf nnz: (%ld + %ld) / 2 = %ld\n", (long)t_nnz,(long)M.ncol,(long)(t_nnz+M.ncol)/2); #endif + t_nnz = long(t_nnz+M.ncol)/2; //double nnz_shoulder = 1.05; double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; // used to boost fill estimate - Int temp = fill_factor*t_nnz; + Int temp = fill_factor*t_nnz; // assuming (t_nnz/2) as triangular part if (temp > t_nnz) { M.nnz = temp; } else { @@ -2298,8 +2304,8 @@ int Basker::sfactor() } if(Options.verbose == BASKER_TRUE) { - printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld) with fill-factor x(%d+%f = %f)\n", + (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol,(int)BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } } }//end assign_leaf_nnz @@ -2330,12 +2336,12 @@ int Basker::sfactor() } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("U_assing_nnz: %ld \n", t_nnz); #endif //double fill_factor = 1.05; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2352,8 +2358,8 @@ int Basker::sfactor() #endif if(Options.verbose == BASKER_TRUE) { - printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f + %f = %f), M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); } } }//end assign_upper_nnz @@ -2384,13 +2390,13 @@ int Basker::sfactor() } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("L_assign_nnz: %ld \n", t_nnz); #endif // double fill_factor = 2.05; double old_nnz = M.nnz; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2407,8 +2413,8 @@ int Basker::sfactor() } if(Options.verbose == BASKER_TRUE) { - printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %e + %e = %e), M.nnz = %ld -> %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); + printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld -> %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); } } }//end assign_lower_nnz @@ -2482,7 +2488,9 @@ int Basker::sfactor() if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif L_D[i].init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2493,6 +2501,9 @@ int Basker::sfactor() //For pruning L_D[i].init_pend(); + #ifdef BASKER_TIMER + printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif U_D[i].init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2532,7 +2543,9 @@ int Basker::sfactor() if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif LBTF[i-btf_tabs_offset].init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2544,7 +2557,9 @@ int Basker::sfactor() //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); LBTF[i-btf_tabs_offset].init_pend(); - //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif UBTF[i-btf_tabs_offset].init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2579,18 +2594,18 @@ int Basker::sfactor() //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); + #ifdef BASKER_TIMER + printf("Malloc Thread: %d iws: %d \n", + i, (thread_array[i].iws_size* + thread_array[i].iws_mult)); + printf("Malloc Thread: %d ews: %d \n", + i, (thread_array[i].ews_size* + thread_array[i].ews_mult)); + #endif if (max_blk_size > 0) { MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); } - #ifdef BASKER_DEBUG_SFACTOR - printf("Malloc Thread: %d iws: %d \n", - i, (thread_array(i).iws_size* - thread_array(i).iws_mult)); - printf("Malloc Thread: %d ews: %d \n", - i, (thread_array(i).ews_size* - thread_array(i).ews_mult)); - #endif } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp index bd5bc82efdbc..ef1e29d597e4 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_structs.hpp @@ -54,7 +54,6 @@ namespace BaskerNS #ifndef BASKER_KOKKOS FREE_INT_1DARRAY(iws); FREE_ENTRY_1DARRAY(ews); - //C.Finalize(); #endif } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 8ea5c54c8e89..c55c222ec7c7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -90,7 +90,7 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_INC_TOL_VALUE 0.0001 //MACRO INC FILL (this will become dynamic in the future) -#define BASKER_FILL_USER 1.00 +#define BASKER_FILL_USER 0.00 #define BASKER_FILL_LESTIMATE 1.50 #define BASKER_FILL_UESTIMATE 1.50 #define BASKER_FILL_LLOWERESTIMATE 2.00 @@ -149,13 +149,12 @@ enum BASKER_INCOMPLETE_CODE #define BOOL_2DARRAY Kokkos::View #define INT_2DARRAY std::vector -#define ENTRY_2DARRAY std::vector +#define ENTRY_2DARRAY std::vector #define MATRIX_1DARRAY std::vector #define MATRIX_2DARRAY std::vector #define MATRIX_VIEW_1DARRAY std::vector #define MATRIX_VIEW_2DARRAY std::vector #define THREAD_1DARRAY std::vector -#define THREAD_2DARRAY std::vector #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -193,7 +192,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_2DARRAY("int_2d",s);*/ \ + /*a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s);*/ \ a.resize(s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -289,16 +288,7 @@ enum BASKER_INCOMPLETE_CODE throw std::bad_alloc(); \ } \ } -#define MALLOC_THREAD_2DARRAY(a,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ - if (s > 0) { \ - /*a = THREAD_2DARRAY("thread_2d",s);*/ \ - a.resize(s); \ - if(a.data() == NULL) \ - throw std::bad_alloc(); \ - } \ - } + //RESIZE (with copy) #define RESIZE_1DARRAY(a,os,s) \ { \ @@ -427,12 +417,6 @@ enum BASKER_INCOMPLETE_CODE a.resize(0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - /*a = TRHEAD_2DARRAY();*/ \ - Kokkos::resize(a,0); \ - } - #else // not BASKER_KOKKOS //Execution Space @@ -458,7 +442,6 @@ enum BASKER_INCOMPLETE_CODE #define MATRIX_VIEW_1DARRAY BASKER_MATRIX_VIEW* #define MATRIX_VIEW_2DARRAY BASKER_MATRIX_VIEW** #define THREAD_1DARRAY BASKER_THREAD* -#define THREAD_2DARRAY BASKER_THREAD** //Macro Memory Calls //Malloc @@ -473,7 +456,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -#define MALLOC_THREAD_2DARRAY(a,s) a = new THREAD_1DARRAY [s] //Resize (copy old data) (come back and add) #define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP #define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP @@ -555,13 +537,6 @@ enum BASKER_INCOMPLETE_CODE FREE(a); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - for(BASKER_INT MACRO_I = 0; MACRO_I < s; MACRO_I++) \ - FREE(a[MACRO_I]); \ - FREE(a); \ - } - #endif //end ifdef BASKER_KOKKOS //Inline command diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 9cf52f3db66d..3691d73c63ba 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -532,7 +532,11 @@ namespace BaskerNS LU[b][LU_size[b]-1].nnz); #endif - //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); + #ifdef BASKER_TIMER + printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, + (int)LU[b][LU_size(b)-1].nrow,(int)LU[b][LU_size(b)-1].ncol,(int)LU[b][LU_size(b)-1].nnz, (int)LU[b][LU_size(b)-1].mnnz, + (int)LU[b][LU_size(b)-1].srow,(int)LU[b][LU_size(b)-1].scol); + #endif LU[b][LU_size(b)-1].init_matrix("Udiag", LU[b][LU_size(b)-1].srow, LU[b][LU_size(b)-1].nrow, @@ -582,7 +586,12 @@ namespace BaskerNS LU[U_col][U_row].nnz); #endif - //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); + #ifdef BASKER_TIMER + printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, + (int)LU[U_col][U_row].nrow,(int)LU[U_col][U_row].ncol, + (int)LU[U_col][U_row].nnz, (int)LU[U_col][U_row].mnnz, + (int)LU[U_col][U_row].srow,(int)LU[U_col][U_row].scol); + #endif LU[U_col][U_row].init_matrix("Uoffdiag", LU[U_col][U_row].srow, LU[U_col][U_row].nrow, From 03b1ab0362ca3aa4c620146b57229652570d9c15 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 25 Oct 2024 17:33:32 -0600 Subject: [PATCH 3/5] ShyLU - Basker : replace std::vector with SequentialHostInit Signed-off-by: iyamazaki --- .../basker/src/shylubasker_error_manager.hpp | 150 +++++++-------- .../basker/src/shylubasker_nfactor_blk.hpp | 74 ++++---- .../src/shylubasker_nfactor_blk_inc.hpp | 171 +++++++++--------- .../basker/src/shylubasker_nfactor_col.hpp | 148 +++++++-------- .../basker/src/shylubasker_nfactor_col2.hpp | 44 ++--- .../src/shylubasker_nfactor_col_inc.hpp | 158 ++++++++-------- .../basker/src/shylubasker_nfactor_diag.hpp | 58 +++--- .../basker/src/shylubasker_sfactor.hpp | 64 +++---- .../basker/src/shylubasker_sfactor_inc.hpp | 24 +-- .../basker/src/shylubasker_tree.hpp | 22 +-- .../basker/src/shylubasker_types.hpp | 85 +++------ .../basker/src/shylubasker_util.hpp | 68 +++---- 12 files changed, 513 insertions(+), 553 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index 84cbb8b801b7..d9695c6e5c78 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -46,66 +46,66 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { threads_start(ti) = BASKER_MAX_IDX; continue; - } else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + } else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK SINGULAR: blk=" << thread_array[ti].error_blk + << " DOMBLK SINGULAR: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " DOMBLK NOMALLOC : blk=" << thread_array[ti].error_blk + << " DOMBLK NOMALLOC : blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_dom_error error_blk"); + BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_dom_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREAD: " << ti - << " DOMBLK MALLOC : blk=" << thread_array[ti].error_blk - << " subblk=" << thread_array[ti].error_subblk - << " newsize=" << thread_array[ti].error_info + << " DOMBLK MALLOC : blk=" << thread_array(ti).error_blk + << " subblk=" << thread_array(ti).error_subblk + << " newsize=" << thread_array(ti).error_info << std::endl; } //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array[ti].error_subblk != BASKER_MAX_IDX) + if(thread_array(ti).error_subblk != BASKER_MAX_IDX) { - BASKER_ASSERT(thread_array[ti].error_info > 0, "L) newsize not big enough"); - resize_L = thread_array[ti].error_info; + BASKER_ASSERT(thread_array(ti).error_info > 0, "L) newsize not big enough"); + resize_L = thread_array(ti).error_info; //if L is already bigger and U, //We will want re size U as, well - if(thread_array[ti].error_subblk == 0) + if(thread_array(ti).error_subblk == 0) { - Int blkcol = thread_array[ti].error_blk; + Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; if(LL[blkcol][0].nnz >= LU[blkcol][blkUrow].nnz) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; } }//if - a domain } //We don't care about the other way since, //L is already checked before U. - if(thread_array[ti].error_subblk == -1) + if(thread_array(ti).error_subblk == -1) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; } //Resize L, if resize_L != -1 (meaning realloc-L is requested) @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][thread_array[ti].error_subblk]; + LL[thread_array(ti).error_blk][thread_array(ti).error_subblk]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU[thread_array[ti].error_blk][0]; + LU[thread_array(ti).error_blk][0]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][0]; + LL[thread_array(ti).error_blk][0]; L.clear_pend(); } @@ -163,11 +163,11 @@ namespace BaskerNS { //Clear workspace, whole column for(Int sb = 0; - sb < LL_size(thread_array[ti].error_blk); + sb < LL_size(thread_array(ti).error_blk); sb++) { BASKER_MATRIX &SL = - LL[thread_array[ti].error_blk][sb]; + LL[thread_array(ti).error_blk][sb]; for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -198,13 +198,13 @@ namespace BaskerNS }//for - sb (subblks) }//if ws is filled - threads_start(ti) = thread_array[ti].error_blk; + threads_start(ti) = thread_array(ti).error_blk; //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; }//if REMALLOC @@ -231,26 +231,26 @@ namespace BaskerNS for(Int ti = 0; ti < num_threads; ti++) { //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { thread_start(ti) = BASKER_MAX_IDX; continue; } - else if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + else if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREAD: " << ti - << " SEPBLK SINGULAR: blk=" << thread_array[ti].error_blk + << " SEPBLK SINGULAR: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; - } else if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + } else if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { if(Options.verbose == BASKER_TRUE) { std::cout << "ERROR THREADS: " << ti - << " SEPBLK NOMALLOC: blk=" << thread_array[ti].error_blk + << " SEPBLK NOMALLOC: blk=" << thread_array(ti).error_blk << std::endl; } return BASKER_ERROR; @@ -260,22 +260,22 @@ namespace BaskerNS Int error_sep_lvl = BASKER_MAX_IDX; for(Int l = 1; l < tree.nlvls+1; l++) { - if(thread_array[ti].error_blk == S[l][ti]) + if(thread_array(ti).error_blk == S(l)(ti)) { error_sep_lvl = l; break; } } - if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - BASKER_ASSERT(thread_array[ti].error_blk >= 0, "nfactor_SEP_error error_blk"); + BASKER_ASSERT(thread_array(ti).error_blk >= 0, "nfactor_SEP_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti - << " SEPBLK MALLOC: blk=" << thread_array[ti].error_blk - << " subblk=" << thread_array[ti].error_subblk - << " newsize=" << thread_array[ti].error_info + << " SEPBLK MALLOC: blk=" << thread_array(ti).error_blk + << " subblk=" << thread_array(ti).error_subblk + << " newsize=" << thread_array(ti).error_info << std::endl; std::cout << " > SEPLVL: " << error_sep_lvl << std::endl; } @@ -283,9 +283,9 @@ namespace BaskerNS //If on diagonal, want to compare L and U Int resize_L = BASKER_MAX_IDX; Int resize_U = BASKER_MAX_IDX; - if(thread_array[ti].error_subblk <= -1) + if(thread_array(ti).error_subblk <= -1) { - resize_L = thread_array[ti].error_info; + resize_L = thread_array(ti).error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ L size: " << resize_L << std::endl; @@ -293,9 +293,9 @@ namespace BaskerNS } //We don't care about the other way since, //L is already checked before U. - if(thread_array[ti].error_subblk > -1) + if(thread_array(ti).error_subblk > -1) { - resize_U = thread_array[ti].error_info; + resize_U = thread_array(ti).error_info; if(Options.verbose == BASKER_TRUE) { std::cout << " ++ U size: " << resize_U << std::endl; @@ -305,9 +305,9 @@ namespace BaskerNS //Resize L, if resize_L != -1 (meaning realloc-L is requested) if(resize_L != BASKER_MAX_IDX) { - const Int tsb = (-1*thread_array[ti].error_subblk)-1; + const Int tsb = (-1*thread_array(ti).error_subblk)-1; BASKER_MATRIX &L = - LL[thread_array[ti].error_blk][tsb]; + LL[thread_array(ti).error_blk][tsb]; REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -322,9 +322,9 @@ namespace BaskerNS //Resize U, if resize_U != -1 (meaning realloc-U is requested) if(resize_U != BASKER_MAX_IDX) { - const Int tsb = thread_array[ti].error_subblk; + const Int tsb = thread_array(ti).error_subblk; BASKER_MATRIX &U = - LU[thread_array[ti].error_blk][tsb]; + LU[thread_array(ti).error_blk][tsb]; REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -346,7 +346,7 @@ namespace BaskerNS //Though this could be done in parallel in the future for(Int p = 0; p < num_threads; p++) { - Int blk = S[0][p]; + Int blk = S(0)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { //Clear workspace, whole column @@ -369,7 +369,7 @@ namespace BaskerNS Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A for(Int p = 0; p < num_threads; p++) { - Int blk = S[error_sep_lvl][p]; + Int blk = S(error_sep_lvl)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { BASKER_MATRIX &TM = LL[blk][0]; @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL[thread_array[ti].error_blk][0]; + LL[thread_array(ti).error_blk][0]; //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -394,12 +394,12 @@ namespace BaskerNS gperm(i) = BASKER_MAX_IDX; }//for--to clear perm - thread_start(ti) = thread_array[ti].error_blk; + thread_start(ti) = thread_array(ti).error_blk; //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; for(Int i = 0; i < num_threads; i++) { @@ -451,9 +451,9 @@ namespace BaskerNS Int btab = btf_tabs_offset; for(Int ti = 0; ti < num_threads; ti++) { - Int c = thread_array[ti].error_blk; + Int c = thread_array(ti).error_blk; //Note: jdb we can make this into a switch - if(thread_array[ti].error_type == BASKER_ERROR_NOERROR) + if(thread_array(ti).error_type == BASKER_ERROR_NOERROR) { if (c >= btab) { thread_start(ti) = BASKER_MAX_IDX; @@ -463,7 +463,7 @@ namespace BaskerNS continue; }//end if NOERROR - if(thread_array[ti].error_type == BASKER_ERROR_SINGULAR) + if(thread_array(ti).error_type == BASKER_ERROR_SINGULAR) { if(Options.verbose == BASKER_TRUE) { @@ -474,7 +474,7 @@ namespace BaskerNS return BASKER_ERROR; }//end if SINGULAR - if(thread_array[ti].error_type == BASKER_ERROR_NOMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_NOMALLOC) { std::cout << "ERROR_THREADS: " << ti << " DIAGBLK NOMALLOC blk=" << c @@ -482,16 +482,16 @@ namespace BaskerNS return BASKER_ERROR; }//end if NOMALLOC - if(thread_array[ti].error_type == BASKER_ERROR_REMALLOC) + if(thread_array(ti).error_type == BASKER_ERROR_REMALLOC) { - Int liwork = thread_array[ti].iws_size*thread_array[ti].iws_mult; - Int lework = thread_array[ti].ews_size*thread_array[ti].ews_mult; + Int liwork = thread_array(ti).iws_size*thread_array(ti).iws_mult; + Int lework = thread_array(ti).ews_size*thread_array(ti).ews_mult; BASKER_ASSERT(c >= 0, "nfactor_diag_error error_blk"); if(Options.verbose == BASKER_TRUE) { std::cout << " > THREADS: " << ti << " DIAGBLK MALLOC blk=" << c - << " newsize=" << thread_array[ti].error_info + << " newsize=" << thread_array(ti).error_info << " for both L( " << c << " ) and U( " << c << " )" << std::endl; @@ -504,11 +504,11 @@ namespace BaskerNS for(Int i = 0; i < liwork; i++) { - thread_array[ti].iws(i) = (Int) 0; + thread_array(ti).iws(i) = (Int) 0; } for(Int i = 0; i < lework; i++) { - thread_array[ti].ews(i) = zero; + thread_array(ti).ews(i) = zero; } //Resize L @@ -516,12 +516,12 @@ namespace BaskerNS L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, - thread_array[ti].error_info); + thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(L.val, L.nnz, - thread_array[ti].error_info); - L.mnnz = thread_array[ti].error_info; - L.nnz = thread_array[ti].error_info; + thread_array(ti).error_info); + L.mnnz = thread_array(ti).error_info; + L.nnz = thread_array(ti).error_info; for(Int i = 0; i < L.ncol; i++) { L.col_ptr(i) = 0; @@ -536,12 +536,12 @@ namespace BaskerNS BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, - thread_array[ti].error_info); + thread_array(ti).error_info); REALLOC_ENTRY_1DARRAY(U.val, U.nnz, - thread_array[ti].error_info); - U.mnnz = thread_array[ti].error_info; - U.nnz = thread_array[ti].error_info; + thread_array(ti).error_info); + U.mnnz = thread_array(ti).error_info; + U.nnz = thread_array(ti).error_info; for(Int i = 0; i < U.ncol; i++) { U.col_ptr(i) = 0; @@ -561,9 +561,9 @@ namespace BaskerNS } //Reset - thread_array[ti].error_type = BASKER_ERROR_NOERROR; - thread_array[ti].error_blk = BASKER_MAX_IDX; - thread_array[ti].error_info = BASKER_MAX_IDX; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_blk = BASKER_MAX_IDX; + thread_array(ti).error_info = BASKER_MAX_IDX; nthread_remalloc++; @@ -593,7 +593,7 @@ namespace BaskerNS { for(Int ti = 0; ti < num_threads; ti++) { - thread_array[ti].error_type = BASKER_ERROR_NOERROR; + thread_array(ti).error_type = BASKER_ERROR_NOERROR; } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 6613d992dbc2..030d526299a1 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -149,7 +149,7 @@ namespace BaskerNS const Mag normA = BTF_A.gnorm; const Mag normA_blk = BTF_A.anorm; - Int b = S[0][kid]; //Which blk from schedule + Int b = S(0)(kid); //Which blk from schedule BASKER_MATRIX &L = LL[b][0]; BASKER_MATRIX &U = LU[b][LU_size(b)-1]; BASKER_MATRIX &M = ALM[b][0]; //A->blk @@ -159,9 +159,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL[b][0].ews; Int ws_size = LL[b][0].iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif //Int bcol = L.scol; //begining col //NOT UD Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -577,11 +577,11 @@ namespace BaskerNS } } if (!explicit_pivot) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; - thread_array[kid].error_info = k; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; + thread_array(kid).error_info = k; return BASKER_ERROR; } } @@ -676,17 +676,17 @@ namespace BaskerNS (int)kid, (long)b, (long)llnnz, (long)lnnz, (long)lcnt, (int)lnnz, (int)M.nrow, (long)newsize); } - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -701,17 +701,17 @@ namespace BaskerNS (int)kid, (long)b, (long)uunnz, (long)unnz+ucnt, (long)k, (int)uunnz, (int)M.nrow, (int)newsize); } - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = -1; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -981,7 +981,7 @@ namespace BaskerNS ) { //Setup variables - const Int wsb = S[0][kid]; + const Int wsb = S(0)(kid); INT_1DARRAY ws = LL[wsb][l].iws; const Int ws_size = LL[wsb][l].iws_size; @@ -1011,11 +1011,11 @@ namespace BaskerNS ) { const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A - const Int b = S[lvl][kid]; + const Int b = S(lvl)(kid); //const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int U_row = LU_size(U_col)-1; if(lvl > 0) { @@ -1128,8 +1128,8 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset @@ -1279,15 +1279,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL INT_1DARRAY ws = LL[wsb][l].iws; const Int ws_size = LL[wsb][l].iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1452,17 +1452,17 @@ namespace BaskerNS Int k, Int top, Int xnnz) { - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); BASKER_MATRIX &L = LL[b][0]; #ifdef BASKER_2DL INT_1DARRAY ws = LL[wsb][l].iws; ENTRY_1DARRAY X = LL[wsb][l].ews; Int ws_size = LL[wsb][l].iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif const Entry zero (0.0); @@ -1658,17 +1658,17 @@ namespace BaskerNS (long)blkcol, (long)blkrow, (long)kid, (long)llnnz, (long)lnnz, (long)p_size ); } - thread_array[kid].error_blk = blkcol; - thread_array[kid].error_subblk = blkrow; + thread_array(kid).error_blk = blkcol; + thread_array(kid).error_subblk = blkrow; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } //BASKER_ASSERT(0==1, "REALLOC LOWER BLOCK\n"); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 48dae30f95c9..cf6fd8b3c0d9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -130,14 +130,14 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_inc_lvl(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; - BASKER_MATRIX &M = ALM[b][0]; //A->blk + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &M = ALM(b)(0); //A->blk - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; Int brow = L.srow; //begining row Int lval = 0; @@ -384,10 +384,10 @@ namespace BaskerNS << pivot << endl; cout << "lcnt: " << lcnt << endl; } - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = b; - thread_array[kid].error_info = k; + thread_array(kid).error_blk = b; + thread_array(kid).error_info = k; return BASKER_ERROR; } @@ -410,17 +410,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = 0; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = 0; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -441,17 +441,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = b; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = b; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -665,9 +665,9 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; INT_1DARRAY ws = LL[wsb][l].iws; @@ -936,12 +936,12 @@ namespace BaskerNS ) { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -985,9 +985,9 @@ namespace BaskerNS //Will want to make this backward in the future //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; INT_1DARRAY ws = LL[wsb][l].iws; @@ -1353,12 +1353,12 @@ namespace BaskerNS //We note that this can be fixed to be faster - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - const Int ws_size = LL[wsb][l].iws_size; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + const Int ws_size = LL(wsb)(l).iws_size; Int brow = L.srow; @@ -1441,12 +1441,12 @@ namespace BaskerNS { //We note that this can be fixed to be faster - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - const Int ws_size = LL[wsb][l].iws_size; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + const Int ws_size = LL(wsb)(l).iws_size; Int brow = L.srow; Int *color = &(ws(0)); @@ -2105,18 +2105,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = blkcol; - thread_array[kid].error_subblk = blkrow; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = blkcol; + thread_array(kid).error_subblk = blkrow; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } @@ -2176,18 +2176,18 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_old(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size[b]-1]; + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size[b]-1); #ifdef BASKER_2DL printf("Accessing blk: %d \n", b); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif Int bcol = L.scol; //begining col @@ -2576,15 +2576,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int brow = L.srow; @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM[blkcol][blkrow]; /* @@ -2750,17 +2750,16 @@ namespace BaskerNS BASKER_MATRIX *LPP = &LU[LP_col][0]; if(LP_row != BASKER_MAX_IDX) { - LPP = &(LL[LP_col][LP_row]); + LPP = &(LL(LP_col)(LP_row)); } BASKER_MATRIX &LP = *(LPP); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; - - Int nnz = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; @@ -2948,7 +2947,7 @@ namespace BaskerNS }//over all nonzero in left - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; return; @@ -2969,7 +2968,7 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM[blkcol][blkrow]; INT_1DARRAY ws = LL[X_col][X_row].iws; @@ -3840,9 +3839,9 @@ namespace BaskerNS ) { - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); const Int team_leader = find_leader(kid,sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //Int loop_col_idx = S(l)(kid); //printf("Reduce col fill called, kid: %d leader: %d \n", @@ -3941,12 +3940,12 @@ namespace BaskerNS //printf("===========T ADD ORIG FILL CALLED\n"); const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; //Int X_col = S(0)(leader_id); - Int X_col = S[0][kid]; + Int X_col = S(0)(kid); Int X_row = l+1; @@ -3977,7 +3976,7 @@ namespace BaskerNS //Int L_row = 0; //const Int U_col = S(lvl)(leader_id); //Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; //printf("=***== fill MY ID: %d LEADER ID: %d ===** \n", @@ -3987,7 +3986,7 @@ namespace BaskerNS { Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); /* printf("leader_id: %d kid: %d lvl: %d l: %d blk: %d %d \n", @@ -3996,16 +3995,16 @@ namespace BaskerNS */ Int my_row_leader = find_leader(kid, lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); - Int A_row = (lvl==l)?(2):S[bl][kid]%(LU_size(A_col)); - if((S[bl](kid)>14) && - (S[bl](kid)>LU_size(A_col)) && + Int A_row = (lvl==l)?(2):S(bl)(kid)%(LU_size(A_col)); + if((S(bl)(kid)>14) && + (S(bl)(kid)>LU_size(A_col)) && (lvl != 1)) { - Int tm = (S[bl][kid]+1)/16; - A_row = ((S[bl][kid]+1)-(tm*16))%LU_size(A_col); + Int tm = (S(bl)(kid)+1)/16; + A_row = ((S(bl)(kid)+1)-(tm*16))%LU_size(A_col); } /* diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 9c77c1f38994..68246464f757 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -134,7 +134,7 @@ namespace BaskerNS double barrier_time = 0; #endif - Int U_col = S[lvl][kid]; + Int U_col = S(lvl)(kid); Int U_row = 0; const Int scol = LU[U_col][U_row].scol; @@ -435,8 +435,8 @@ namespace BaskerNS for(Int l = 0; l < lvl; l++) { printf("OPS. KID : %d LVL: %d OPS : %d \n", - kid, l, thread_array[kid].ops_counts[l][0]); - thread_array[kid].ops_count[1][0] = 0; + kid, l, thread_array(kid).ops_counts[l][0]); + thread_array(kid).ops_count[1][0] = 0; } #endif @@ -460,15 +460,15 @@ namespace BaskerNS const Entry zero (0.0); //Get needed variables - const Int L_col = S[l][kid]; - const Int U_col = S[lvl][kid]; + const Int L_col = S(l)(kid); + const Int U_col = S(lvl)(kid); - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); //Int my_new_row = // L_col - my_row_leader; Int U_row = L_col - my_row_leader; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -493,7 +493,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); //printf("Using temp matrix, kid: %d\n", kid); //Bp->print(); } @@ -613,7 +613,7 @@ namespace BaskerNS //Count ops to show imbalance #ifdef BASKER_COUNT_OPS - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif //WE SHOUD DO A UNNZ COUNT @@ -649,17 +649,17 @@ namespace BaskerNS Int newsize = (unnz+U.nrow) * 1.2 ; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; }//if/else realloc } @@ -741,10 +741,10 @@ namespace BaskerNS std::cout << "----Error--- kid = " << kid << ": extra L[" << j << "]=" << X[j] << " with gperm( " << brow_g << " + " << j << " ) = " << t << std::endl; - thread_array[kid].error_type = BASKER_ERROR_OTHER; - thread_array[kid].error_blk = lvl; - thread_array[kid].error_subblk = l; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_OTHER; + thread_array(kid).error_blk = lvl; + thread_array(kid).error_subblk = l; + thread_array(kid).error_info = k; info = BASKER_ERROR; //BASKER_ASSERT(t != BASKER_MAX_IDX, "lower entry in U"); #endif @@ -864,11 +864,11 @@ namespace BaskerNS int lteam_size = pow(2, l); #ifdef BASKER_2DL - Int L_col = S[l][my_leader]; + Int L_col = S(l)(my_leader); Int L_row = 0; - Int U_col = S[lvl][kid]; - Int U_row = (lvl==1)?(kid%2):S[l][kid]%LU_size(U_col); - Int X_col = S[0][my_leader]; + Int U_col = S(lvl)(kid); + Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); + Int X_col = S(0)(my_leader); Int X_row = l; //this will change for us Int col_idx_offset = 0; BASKER_MATRIX &U = LU[U_col][U_row]; @@ -959,12 +959,12 @@ namespace BaskerNS ) { - Int b = S[l][kid]; + Int b = S(l)(kid); BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[team_leader].ews; - Int ws_size = thread_array[kid].iws_size; - Int ews_size = thread_array[team_leader].ews_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(team_leader).ews; + Int ws_size = thread_array(kid).iws_size; + Int ews_size = thread_array(team_leader).ews_size; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid>3) @@ -1066,11 +1066,11 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; //Get needed variables - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = 0; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = LU_size(U_col)-1; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); //Int col_idx_offset = 0; //can we get rid of now? #ifdef BASKER_DEBUG_NFACTOR_COL @@ -1083,7 +1083,7 @@ namespace BaskerNS BASKER_MATRIX &L = LL[L_col][L_row]; BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array[kid].C; + BASKER_MATRIX &B = thread_array(kid).C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -1237,7 +1237,7 @@ namespace BaskerNS #endif #ifdef BASKER_OPS_COUNT - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif t_back_solve(kid, lvl,l+1, k, top, xnnz); // note: l not lvl given @@ -1327,10 +1327,10 @@ namespace BaskerNS X(maxindex) = pivot; } else { // replace-tiny-pivot not requested, or the current column is structurally empty after elimination - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = k; return BASKER_ERROR; } } else if (Options.replace_tiny_pivot && normA_blk > abs(zero) && abs(pivot) < normA_blk * sqrt(eps)) { @@ -1374,17 +1374,17 @@ namespace BaskerNS //cout << " > L_col = " << L_col << " L_row = " << L_row << endl; } - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -1399,17 +1399,17 @@ namespace BaskerNS << endl; } - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -1640,11 +1640,11 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? @@ -1743,15 +1743,15 @@ namespace BaskerNS //Setup - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); - const Int leader_idx = S[0][team_leader]; - Int loop_col_idx = S[l][kid]; + const Int leader_idx = S(0)(team_leader); + Int loop_col_idx = S(l)(kid); #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -1909,15 +1909,15 @@ namespace BaskerNS //Setup - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[l+1][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); BASKER_MATRIX &B = AVM[A_col][A_col]; - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); - const Int leader_idx = S[0][team_leader]; - Int loop_col_idx = S[l][kid]; + const Int leader_idx = S(0)(team_leader); + Int loop_col_idx = S(l)(kid); #ifdef BASKER_DEBUG_NFACTOR_COL printf("Called t_blk_col_copy_atomic kid: %d " , kid); @@ -2073,8 +2073,8 @@ namespace BaskerNS //printf("-----------------copy_update_matrx----------"); //printf("\n\n\n\n"); - Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; //COME BACK HERE @@ -2089,8 +2089,8 @@ namespace BaskerNS // for(Int bl = l+1; bl < last_blk; bl++) { Int bl = l+1; - Int A_col = S[lvl][kid]; - Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); + Int A_col = S(lvl)(kid); + Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); Int CM_idx = kid; BASKER_MATRIX *Bp; @@ -2173,8 +2173,8 @@ namespace BaskerNS // l, last_blk, kid); for(Int bl=l+1; bl BB; - BB.Barrier(thread_array[leader_kid].token[sublvl][function_n], - thread_array[leader_kid].token[sublvl][1], + BB.Barrier(thread_array(leader_kid).token[sublvl][function_n], + thread_array(leader_kid).token[sublvl][1], size); */ } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp index 342835bd640c..4389365a84d6 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp @@ -135,7 +135,7 @@ namespace BaskerNS // 3) t_lower_col_factor : factor A(7,7), sequential // 4) t_lower_col_factor_offdiag2 : compute L(8:end, 7) - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = 0; Int ncol = LU[U_col][U_row].ncol; Int my_leader = find_leader(kid, 0); @@ -181,7 +181,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 0, LU[U_col][U_row].scol, 0); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array[tid].error_type != BASKER_SUCCESS) { + if (thread_array(tid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -250,7 +250,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 3, LU[U_col][U_row].scol, 0); for(Int ti = 0; ti < num_threads; ti++) { - if (thread_array[kid].error_type != BASKER_SUCCESS) { + if (thread_array(kid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -336,7 +336,7 @@ namespace BaskerNS t_basker_barrier(thread, kid, my_leader, b_size, 4, k, lvl-1); for(Int tid = 0; tid < num_threads; tid++) { - if (thread_array[tid].error_type != BASKER_SUCCESS) { + if (thread_array(tid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; } } @@ -395,7 +395,7 @@ namespace BaskerNS #ifdef BASKER_TIMER double time_factot = timer.seconds(); if((kid%(Int)(pow(2,lvl))) == 0) { - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = LU_size(U_col)-1; printf("Time Lower-Col(%d): %lf, n = %d, nnz(L) = %d, nnz(U) = %d \n", (int)kid, time_factot, @@ -446,7 +446,7 @@ namespace BaskerNS #endif //This will do the correct spmv - if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_upper_col_factor_offdiag2(kid, lvl, sl,l, k, lower); } //Barrier--Start @@ -461,7 +461,7 @@ namespace BaskerNS //Barrier--End if(kid%((Int)pow(2,sl)) == 0 && - thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_dense_blk_col_copy_atomic2(kid, my_leader, lvl, sl, l, k, lower); } @@ -477,7 +477,7 @@ namespace BaskerNS #endif }//over all sublevels - if(thread_array[kid].error_type == BASKER_ERROR_NOERROR) { + if(thread_array(kid).error_type == BASKER_ERROR_NOERROR) { t_dense_copy_update_matrix2(kid, my_leader, lvl, l, k); } }//end t_add_add @@ -507,10 +507,10 @@ namespace BaskerNS return; } - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; - const Int L_col = S[sl][my_leader]; - const Int U_col = S[lvl][kid]; - const Int X_col = S[0][my_leader]; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + const Int L_col = S(sl)(my_leader); + const Int U_col = S(lvl)(kid); + const Int X_col = S(0)(my_leader); Int L_row = l-sl+1; //Might have to think about th Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us @@ -588,10 +588,10 @@ namespace BaskerNS //Setup //printf("DEBUG, kid: %d k: %d A_col: %d A_row: %d \n", // kid, k, A_col, A_row); - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); #ifdef BASKER_DEBUG_NFACTOR_COL2 if(lower == BASKER_TRUE) { @@ -709,8 +709,8 @@ namespace BaskerNS //printf("\n\n\n\n"); const Entry zero (0.0); - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; //Over each blk @@ -724,10 +724,10 @@ namespace BaskerNS // X += B(:, k) { Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); - Int my_row_leader = S[0][find_leader(kid,lvl-1)]; - Int A_row = S[bl][kid] - my_row_leader; + Int my_row_leader = S(0)(find_leader(kid,lvl-1)); + Int A_row = S(bl)(kid) - my_row_leader; BASKER_MATRIX *Bp; if(A_row != (LU_size(A_col)-1)) @@ -875,13 +875,13 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; - const Int U_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); + const Int U_col = S(lvl)(leader_id); Int L_row = 0; Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index ee72c5d32c7b..02fde7c7ccad 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -83,7 +83,7 @@ namespace BaskerNS ) { - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int U_row = 0; //const Int scol = LU(U_col)(U_row).scol; @@ -577,12 +577,12 @@ namespace BaskerNS ) { l = l+1; - Int my_token = S[l][kid]; + Int my_token = S(l)(kid); Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S[l][my_loc] != my_token) + if(S(l)(my_loc) != my_token) { my_loc++; break; @@ -615,14 +615,14 @@ namespace BaskerNS //Get needed variables - const Int L_col = S[l][kid]; + const Int L_col = S(l)(kid); // const Int L_row = 0; //NDE - warning: unused - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); //Int my_new_row = // L_col - S(0)(my_row_leader); - Int U_row = L_col - S[0][my_row_leader]; + Int U_row = L_col - S(0)(my_row_leader); /* Int U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); @@ -642,7 +642,7 @@ namespace BaskerNS //U_row = my_new_row; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); const Int X_row = l; //X_row = lower(L) //const Int col_idx_offset = 0; //we might be able to remove @@ -664,7 +664,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); } BASKER_MATRIX &B = *Bp; //if(kid ==0) @@ -920,18 +920,18 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { //printf("HERE\n"); - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; + thread_array(kid).error_info = newsize; return BASKER_ERROR; }//if/else realloc }//if need to realloc @@ -1086,26 +1086,26 @@ namespace BaskerNS // kid, lvl, sl, l); } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); - if((S[sl][kid] > 14) && - (S[sl][kid] > LU_size(U_col)) && + (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); + if((S(sl)(kid) > 14) && + (S(sl)(kid) > LU_size(U_col)) && (lvl != 1)) { //printf("lower offdiag new num, %d %d \n", // S(sl)(kid), LU_size(U_col)); - Int tm = (S[sl][kid]+1)/16; - U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); + Int tm = (S(sl)(kid)+1)/16; + U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); } //printf("UFF kid:%d U: %d %d new: %d leader: %d %d lvl: %d l: %d sl: %d \n", @@ -1116,7 +1116,7 @@ namespace BaskerNS //JDB PASS TEST U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us //Int col_idx_offset = 0; @@ -1256,18 +1256,18 @@ namespace BaskerNS return; } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); Int U_row = 0; U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us Int col_idx_offset = 0; @@ -1276,11 +1276,11 @@ namespace BaskerNS //Need to give them the output pattern - Int U_pattern_col = S[lvl][kid]; + Int U_pattern_col = S(lvl)(kid); Int my_pattern_leader = find_leader_inc_lvl(kid,l); - Int U_pattern_row = S[l+1][my_pattern_leader] - - S[0][my_row_leader]; + Int U_pattern_row = S(l+1)(my_pattern_leader) - + S(0)(my_row_leader); /* printf("Test mypleader: %d myrowleader: %d kid: %d\n", @@ -1292,7 +1292,7 @@ namespace BaskerNS */ - Int L_pattern_col = S[lvl][kid]; + Int L_pattern_col = S(lvl)(kid); Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -1418,26 +1418,26 @@ namespace BaskerNS return; } - const Int L_col = S[sl][my_leader]; + const Int L_col = S(sl)(my_leader); Int L_row = l-sl+1; //Might have to think about th - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - L_col - S[0][my_row_leader]; + L_col - S(0)(my_row_leader); // Int U_row = my_new_row; Int U_row = - (lvl==1)?(kid%2):S[sl][kid]%LU_size(U_col); - if((S[sl][kid] > 14) && - (S[sl][kid] > LU_size(U_col)) && + (lvl==1)?(kid%2):S(sl)(kid)%LU_size(U_col); + if((S(sl)(kid) > 14) && + (S(sl)(kid) > LU_size(U_col)) && (lvl != 1)) { - Int tm = (S[sl][kid]+1)/16; - U_row = ((S[sl][kid]+1) - (tm*16))%LU_size(U_col); + Int tm = (S(sl)(kid)+1)/16; + U_row = ((S(sl)(kid)+1) - (tm*16))%LU_size(U_col); } // printf("lowerspmv kid: %d U: %d %d new %d leader: %d %d lvl: %d %d %d \n", @@ -1448,7 +1448,7 @@ namespace BaskerNS U_row = my_new_row; - const Int X_col = S[0][my_leader]; + const Int X_col = S(0)(my_leader); Int X_row = l+1; //this will change for us Int col_idx_offset = 0; @@ -1538,8 +1538,8 @@ namespace BaskerNS ) { - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; // Int gbrow = 0; //NDE - warning: unused @@ -1549,11 +1549,11 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); Int A_row = 0; A_row = my_new_row; @@ -1653,8 +1653,8 @@ namespace BaskerNS ) { - const Int leader_idx = S[0][kid]; - BASKER_MATRIX &C = thread_array[kid].C; + const Int leader_idx = S(0)(kid); + BASKER_MATRIX &C = thread_array(kid).C; Int nnz = 0; Int gbrow = 0; @@ -1672,24 +1672,24 @@ namespace BaskerNS { //Copy B -> C Int bl = l+1; - Int A_col = S[lvl][kid]; + Int A_col = S(lvl)(kid); Int my_row_leader = find_leader(kid,lvl-1); Int my_new_row = - S[bl][kid] - S[0][my_row_leader]; + S(bl)(kid) - S(0)(my_row_leader); //Int A_row = my_new_row; - Int A_row = (lvl==1)?(2):S[bl][kid]%(LU_size(A_col)); - if((S[bl][kid] > 14) && - (S[bl][kid] > LU_size(A_col)) && + Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); + if((S(bl)(kid) > 14) && + (S(bl)(kid) > LU_size(A_col)) && (lvl != 1)) { //printf("test cm %d %d %d \n", // kid, S(bl)(kid), LU_size(A_col)); - Int tm = (S[bl][kid]+1)/16; - A_row = ((S[bl][kid]+1) - (tm*16))%LU_size(A_col); + Int tm = (S(bl)(kid)+1)/16; + A_row = ((S(bl)(kid)+1) - (tm*16))%LU_size(A_col); } @@ -1883,12 +1883,12 @@ namespace BaskerNS ) { //Get needed variables - const Int L_col = S[lvl][kid]; + const Int L_col = S(lvl)(kid); const Int L_row = 0; - const Int U_col = S[lvl][kid]; + const Int U_col = S(lvl)(kid); const Int U_row = LU_size(U_col)-1; - const Int X_col = S[0][kid]; + const Int X_col = S(0)(kid); //Int col_idx_offset = 0; //can we get rid of now? @@ -1905,7 +1905,7 @@ namespace BaskerNS BASKER_MATRIX &L = LL[L_col][L_row]; BASKER_MATRIX &U = LU[U_col][U_row]; - BASKER_MATRIX &B = thread_array[kid].C; + BASKER_MATRIX &B = thread_array(kid).C; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid >= 0) @@ -2201,17 +2201,17 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = L_col; - thread_array[kid].error_subblk = -1; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = L_col; + thread_array(kid).error_subblk = -1; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -2229,16 +2229,16 @@ namespace BaskerNS if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; } else { - thread_array[kid].error_type = + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_blk = U_col; - thread_array[kid].error_subblk = U_row; - thread_array[kid].error_info = newsize; + thread_array(kid).error_blk = U_col; + thread_array(kid).error_subblk = U_row; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -2462,11 +2462,11 @@ namespace BaskerNS const Int leader_id = find_leader(kid, l); const Int lteam_size = pow(2,l+1); - const Int L_col = S[lvl][leader_id]; + const Int L_col = S(lvl)(leader_id); Int L_row = 0; - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? @@ -2585,9 +2585,9 @@ namespace BaskerNS //const Int lteam_size = pow(2,l+1); //NDE - warning: unused // const Int L_col = S(lvl)(leader_id); //NDE - warning: unused // Int L_row = 0; //NDE - warning: unused - const Int U_col = S[lvl][leader_id]; + const Int U_col = S(lvl)(leader_id); Int U_row = LU_size(U_col)-1; - Int X_col = S[0][leader_id]; + Int X_col = S(0)(leader_id); Int X_row = l+1; //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused @@ -2621,11 +2621,11 @@ namespace BaskerNS const BASKER_BOOL lower ) { - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //If I an not a leader, then need to copy over if(kid != team_leader) @@ -2682,7 +2682,7 @@ namespace BaskerNS - Int U_pattern_col = S[lvl][kid]; + Int U_pattern_col = S(lvl)(kid); Int U_pattern_row = BASKER_MAX_IDX; if(blk == l+1) @@ -2691,11 +2691,11 @@ namespace BaskerNS //S(0)(find_leader(kid,lvl)); //U_pattern_row = S(l+1)(kid) - //S(0)(my_pattern_leader); - U_pattern_row = S[l+1][kid] - - S[0][find_leader(kid,lvl-1)]; + U_pattern_row = S(l+1)(kid) - + S(0)(find_leader(kid,lvl-1)); } - Int L_pattern_col = S[lvl][kid]; + Int L_pattern_col = S(lvl)(kid); Int L_pattern_row = BASKER_MAX_IDX; if(lower == BASKER_TRUE) { @@ -2769,11 +2769,11 @@ namespace BaskerNS //BASKER_MATRIX &B = AVM(A_col)(A_col); - const Int my_idx = S[0][kid]; + const Int my_idx = S(0)(kid); //should remove either as a paramter or here Int team_leader = find_leader(kid, sl); - const Int leader_idx = S[0][team_leader]; + const Int leader_idx = S(0)(team_leader); //Int loop_col_idx = S(l)(kid); NU //#ifdef BASKER_DEBUG_NFACTOR_COL2 diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index dc59708fe158..b87a0f48eadf 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -294,9 +294,9 @@ namespace BaskerNS printf("Error: NaN diag in single factor\n"); } } - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } @@ -373,9 +373,9 @@ namespace BaskerNS Mag rmin_ (0.0); //workspace - Int ws_size = thread_array[kid].iws_size; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; + Int ws_size = thread_array(kid).iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -580,9 +580,9 @@ namespace BaskerNS << " Column: " << k << std::endl; } - thread_array[kid].error_type = BASKER_ERROR_NAN; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_NAN; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } absv = abs(value); @@ -714,9 +714,9 @@ namespace BaskerNS pivot = normA_blk * eps; X(maxindex) = pivot; } else { - thread_array[kid].error_type = BASKER_ERROR_SINGULAR; - thread_array[kid].error_blk = c; - thread_array[kid].error_info = k; + thread_array(kid).error_type = BASKER_ERROR_SINGULAR; + thread_array(kid).error_blk = c; + thread_array(kid).error_info = k; return BASKER_ERROR; } } @@ -780,16 +780,16 @@ namespace BaskerNS (long)btf_tabs(c), (long)btf_tabs(c+1), (long)(btf_tabs(c+1)-btf_tabs(c))); } - thread_array[kid].error_blk = c; + thread_array(kid).error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -804,16 +804,16 @@ namespace BaskerNS printf("blk: %ld column: %ld \n", (long)c, (long)k); } - thread_array[kid].error_blk = c; + thread_array(kid).error_blk = c; if(Options.realloc == BASKER_FALSE) { - thread_array[kid].error_type = BASKER_ERROR_NOMALLOC; + thread_array(kid).error_type = BASKER_ERROR_NOMALLOC; return BASKER_ERROR; } else { - thread_array[kid].error_type = BASKER_ERROR_REMALLOC; - thread_array[kid].error_info = newsize; + thread_array(kid).error_type = BASKER_ERROR_REMALLOC; + thread_array(kid).error_info = newsize; return BASKER_ERROR; } } @@ -991,8 +991,8 @@ namespace BaskerNS ) { //printf("=======LOCAL REACH BTF SHORT CALLED (pattern[top=%d - 1] = %d) =====\n",(int)top, (int)j); - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,8 +1014,8 @@ namespace BaskerNS { //printf("=======LOCAL REACH BTF CALLED =====\n"); - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; /*{ printf("ws_size: %d \n", ws_size); @@ -1144,8 +1144,8 @@ namespace BaskerNS ) { - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; /* printf("ws_size: %d \n", ws_size); @@ -1289,9 +1289,9 @@ namespace BaskerNS { const Entry zero (0.0); - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; Int brow = L.srow; Int *color = &(ws(0)); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index d48f0e720a7e..ef9bdb8084ef 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -285,8 +285,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.ncol > 0, "Basker symmetric_sfactor assert: A.ncol malloc > 0 failed"); - MALLOC_INT_1DARRAY(gScol[ii], A.ncol); - init_value(gScol[ii], A.ncol, (Int)0); + MALLOC_INT_1DARRAY(gScol(ii), A.ncol); + init_value(gScol(ii), A.ncol, (Int)0); } @@ -298,8 +298,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.nrow > 0, "sfactor A.nrow malloc"); - MALLOC_INT_1DARRAY(gSrow[ii], A.nrow); - init_value(gSrow[ii], A.nrow, (Int)0); + MALLOC_INT_1DARRAY(gSrow(ii), A.nrow); + init_value(gSrow(ii), A.nrow, (Int)0); } #ifdef BASKER_TIMER @@ -334,7 +334,7 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) #endif { - Int blk = S[0][p]; + Int blk = S(0)(p); if(Options.verbose == BASKER_TRUE) { printf(" ============= DOMAIN BLK (p=%d) ============\n",(int)p); @@ -409,18 +409,18 @@ int Basker::sfactor() for(Int p = 0; p < num_threads; ++p) { //Do off diag - Int blk = S[0][p]; + Int blk = S(0)(p); #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; #endif for(Int l =0; l < tree.nlvls; l++) { - Int U_col = S[l+1][p]; + Int U_col = S(l+1)(p); //Note: Need to think more about this flow //Should be subtracted by how many times in the //future - Int my_row_leader = S[0][find_leader(p,l)]; + Int my_row_leader = S(0)(find_leader(p,l)); //Int my_new_row = // blk - my_row_leader; Int U_row = blk-my_row_leader; @@ -436,16 +436,16 @@ int Basker::sfactor() Int off_diag = 1; //printf( " U_blk_sfactor(AVM(%d,%d))\n",U_col,U_row ); //U_blk_sfactor(AV[U_col][U_row], stree, - // gScol[l], gSrow[glvl],0); + // gScol(l), gSrow(glvl),0); #ifdef BASKER_TIMER timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM[U_col][U_row], stree_p, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #else U_blk_sfactor(AVM[U_col][U_row], stree, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER time3 += timer1.seconds(); @@ -533,11 +533,11 @@ int Basker::sfactor() (long)U_col, (long)U_row, (long)lvl, (long)pp); #endif - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, - //gScol[lvl], gSrow[pp]); + //gScol(lvl), gSrow(pp)); #ifdef BASKER_TIMER printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); @@ -545,10 +545,10 @@ int Basker::sfactor() #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; S_blk_sfactor(ALM[U_col][U_row], stree_p, - gScol[lvl], gSrow[pp]); + gScol(lvl), gSrow(pp)); #else S_blk_sfactor(ALM[U_col][U_row], stree, - gScol[lvl], gSrow[pp]); + gScol(lvl), gSrow(pp)); #endif #ifdef BASKER_TIMER printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); @@ -592,20 +592,20 @@ int Basker::sfactor() Int ppp; ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); - U_col = S[l+1][ppp]; - U_row = S[lvl+1][ppp]%LU_size(U_col); + U_col = S(l+1)(ppp); + U_row = S(lvl+1)(ppp)%LU_size(U_col); - Int my_row_leader = S[0][find_leader(ppp,l)]; + Int my_row_leader = S(0)(find_leader(ppp,l)); //Int my_new_row = // S(lvl+1)(ppp) - my_row_leader; - U_row = S[lvl+1][ppp] - my_row_leader; + U_row = S(lvl+1)(ppp) - my_row_leader; #ifdef BASKER_DEBUG_SFACTOR printf("offida sep, lvl: %d l: %d U_col: %d U_row: %d \n", lvl, l, U_col, U_row); @@ -615,10 +615,10 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM[U_col][U_row], stree_p, - gScol[l], gSrow[pp], off_diag); + gScol(l), gSrow(pp), off_diag); #else U_blk_sfactor(AVM[U_col][U_row], stree, - gScol[l], gSrow[pp], off_diag); + gScol(l), gSrow(pp), off_diag); #endif //In symmetric will not need @@ -656,8 +656,8 @@ int Basker::sfactor() for(Int ii = 0 ; ii < split_num; ++ii) { //printf("split\n"); - FREE(gScol[ii]); - FREE(gSrow[ii]); + FREE(gScol(ii)); + FREE(gSrow(ii)); } FREE(gScol); FREE(gSrow); @@ -2589,22 +2589,22 @@ int Basker::sfactor() for(Int i = 0 ; i < num_threads; i++) { - thread_array[i].iws_size = max_blk_size; - thread_array[i].ews_size = max_blk_size; + thread_array(i).iws_size = max_blk_size; + thread_array(i).ews_size = max_blk_size; //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); #ifdef BASKER_TIMER printf("Malloc Thread: %d iws: %d \n", - i, (thread_array[i].iws_size* - thread_array[i].iws_mult)); + i, (thread_array(i).iws_size* + thread_array(i).iws_mult)); printf("Malloc Thread: %d ews: %d \n", - i, (thread_array[i].ews_size* - thread_array[i].ews_mult)); + i, (thread_array(i).ews_size* + thread_array(i).ews_mult)); #endif if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array[i].iws, thread_array[i].iws_size*thread_array[i].iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array[i].ews, thread_array[i].ews_size*thread_array[i].ews_mult); + MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); } } } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index ec7774a43f13..890bc8a17fca 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -99,20 +99,20 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { - Int blk = S[0][p]; + Int blk = S(0)(p); sfactor_nd_dom_estimate(ALM[blk][0], LL[blk][0], LU[blk][LU_size(blk)-1]); for(Int l=0; l < tree.nlvls; l++) { - Int U_col = S[l+1][p]; + Int U_col = S(l+1)(p); Int my_row_leader = find_leader(p,l); Int my_new_row = - blk - S[0][my_row_leader]; + blk - S(0)(my_row_leader); - Int U_row = (l==0)?(p%2):S[0][p]%LU_size(U_col); + Int U_row = (l==0)?(p%2):S(0)(p)%LU_size(U_col); if((blk > 14) && (blk > LU_size(U_col)) && (l!=0)) @@ -138,7 +138,7 @@ namespace BaskerNS for(Int pp=0; pp < pow(tree.nparts, tree.nlvls-lvl-1); pp++) { Int ppp = pp*pow(tree.nparts, lvl+1); - Int U_col = S[lvl+1][ppp]; + Int U_col = S(lvl+1)(ppp); Int U_row = 0; sfactor_nd_sep_estimate(ALM[U_col][U_row], @@ -148,19 +148,19 @@ namespace BaskerNS Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { - U_col = S[l+1][ppp]; + U_col = S(l+1)(ppp); Int my_row_leader = find_leader(ppp,l); Int my_new_row = - S[lvl+1][ppp] - S[0][my_row_leader]; + S(lvl+1)(ppp) - S(0)(my_row_leader); - U_row = S[lvl+1][ppp]%LU_size(U_col); - if((S[lvl+1][ppp] > 14) && - (S[lvl+1][ppp] > LU_size(U_col)) + U_row = S(lvl+1)(ppp)%LU_size(U_col); + if((S(lvl+1)(ppp) > 14) && + (S(lvl+1)(ppp) > LU_size(U_col)) ) { - Int tm = (S[lvl+1][ppp]+1)/16; - U_row = ((S[lvl+1][ppp]+1) - + Int tm = (S(lvl+1)(ppp)+1)/16; + U_row = ((S(lvl+1)(ppp)+1) - (tm*16))%LU_size(U_col); } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index 81e3c78c7f9c..74a478591636 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -118,7 +118,7 @@ namespace BaskerNS for(Int i =0; i < tree.nblks+1; i++) { BASKER_ASSERT(num_threads > 0, "tree num_threads"); - MALLOC_INT_1DARRAY(S[i], num_threads); + MALLOC_INT_1DARRAY(S(i), num_threads); } //this will want to be across all threads @@ -335,7 +335,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -356,7 +356,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -368,11 +368,11 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -592,7 +592,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -611,7 +611,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -624,10 +624,10 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index c55c222ec7c7..193ecb11e24a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -148,13 +148,13 @@ enum BASKER_INCOMPLETE_CODE #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define INT_2DARRAY std::vector -#define ENTRY_2DARRAY std::vector -#define MATRIX_1DARRAY std::vector -#define MATRIX_2DARRAY std::vector -#define MATRIX_VIEW_1DARRAY std::vector -#define MATRIX_VIEW_2DARRAY std::vector -#define THREAD_1DARRAY std::vector +#define INT_2DARRAY Kokkos::View +#define ENTRY_2DARRAY Kokkos::View +#define MATRIX_1DARRAY Kokkos::View +#define MATRIX_2DARRAY Kokkos::View +#define MATRIX_VIEW_1DARRAY Kokkos::View +#define MATRIX_VIEW_2DARRAY Kokkos::View +#define THREAD_1DARRAY Kokkos::View #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +163,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -192,8 +191,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s);*/ \ - a.resize(s); \ + a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -202,7 +200,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -212,8 +209,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = ENTRY_2DARRAY("entry_2d",s);*/ \ - a.resize(s); \ + a = ENTRY_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -222,7 +218,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -232,7 +227,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = BOOL_2DARRAY("bool_2d", s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -242,8 +236,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_1DARRAY("matrix_1d",s)*/ \ - a.resize(s); \ + a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ + Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,8 +246,8 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_2DARRAY("matrix_2d",s);*/ \ - a.resize(s); \ + a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ + Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -262,8 +256,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s);*/ \ - a.resize(s); \ + a = MATRIX_VIEW_1DARRAY(Kokkos::view_alloc("matrix_view_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -272,8 +265,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s);*/ \ - a.resize(s); \ + a = MATRIX_VIEW_2DARRAY(Kokkos::view_alloc("matrix_view_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -282,26 +274,12 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = THREAD_1DARRAY("thread_1d",s);*/ \ - a.resize(s); \ + a = THREAD_1DARRAY(Kokkos::view_alloc("thread_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ } -//RESIZE (with copy) -#define RESIZE_1DARRAY(a,os,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT RESIZE 1D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s); \ - } -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) \ - { \ - BASKER_ASSERT(s1 >= 0 && s2 >= 0, "BASKER ASSERT RESIZE 2D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s1,s2); \ - } -#define RESIZE_INT_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) -#define RESIZE_ENTRY_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) //REALLOC (no copy) #define REALLOC_1DARRAY(a,os,s) \ { \ @@ -315,6 +293,7 @@ enum BASKER_INCOMPLETE_CODE } #define REALLOC_INT_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) #define REALLOC_ENTRY_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) + //Set values #define SET_INT_1DARRAY(a, b, s) \ { \ @@ -341,80 +320,67 @@ enum BASKER_INCOMPLETE_CODE #define FREE_INT_1DARRAY_PAIRS(a) \ { \ - /*a = INT_1DARRAY_PAIRS();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_1DARRAY(a) \ { \ - /*a = INT_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_RANK2DARRAY(a) \ { \ - /*a = INT_RANK2DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_INT_2DARRAY(a,n) \ { \ - /*a = INT_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_ENTRY_1DARRAY(a) \ { \ - /*a = ENTRY_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_ENTRY_2DARRAY(a,n) \ { \ - /*a = ENTRY_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_BOOL_1DARRAY(a) \ { \ - /*a = BOOL_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_BOOL_2DARRAY(a,n) \ { \ - /*a = BOOL_2DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_MATRIX_1DARRAY(a) \ { \ - /*a = MATRIX_1DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_2DARRAY(a,n) \ { \ - /*a = MATRIX_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ { \ - /*a = MATRIX_VIEW_1DARRAY();*/ \ Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_2DARRAY(a,n) \ { \ - /*a = MATRIX_VIEW_2DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #define FREE_THREAD_1DARRAY(a) \ { \ - /*a = THREAD_1DARRAY();*/ \ - a.resize(0); \ + Kokkos::resize(a,0); \ } #else // not BASKER_KOKKOS @@ -456,11 +422,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -//Resize (copy old data) (come back and add) -#define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP -#define RESIZE_INT_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_ENTRY_1DARRAY(a,os,s) BASKER_NO_OP //Realloc (dont copy old data) #define REALLOC_1DARRAY(a,os,s) BASKER_NO_OP #define REALLOC_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 3691d73c63ba..715ac1c13f5f 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -358,7 +358,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -378,7 +378,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -402,13 +402,13 @@ namespace BaskerNS LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S[0][my_row_leader]; + b - S(0)(my_row_leader); - Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); //JDB TEST PASS U_row = my_new_row; @@ -460,7 +460,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -524,7 +524,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", @@ -550,13 +550,13 @@ namespace BaskerNS for(Int l = lvl+1; l < tree.nlvls+1; l++) { - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int my_row_leader = find_leader(kid, l-1); Int my_new_row = - b - S[0][my_row_leader]; + b - S(0)(my_row_leader); - Int U_row = (l==1)?(kid%2):S[lvl][kid]%LU_size(U_col); + Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); if( (b > 14) && // NDE: Why is 14 specifically used here? (b > LU_size(U_col)) && @@ -640,7 +640,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int row = 0; row < LL_size(b); row++) { @@ -692,7 +692,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", @@ -731,10 +731,10 @@ namespace BaskerNS //TEST Int my_leader = find_leader(kid,l-1); - Int my_leader_row = S[0][my_leader]; + Int my_leader_row = S(0)(my_leader); //Int my_col_size = pow(2,l); Not used Int my_new_row = - (S[lvl][kid] - my_leader_row); + (S(lvl)(kid) - my_leader_row); //my_new_row = my_new_row%my_col_size; /* @@ -745,7 +745,7 @@ namespace BaskerNS my_col_size, my_new_row); */ - Int U_col = S[l][kid]; + Int U_col = S(l)(kid); Int U_row = my_new_row; //Int U_row = (l==1)?(kid%2):S(lvl)(kid)%LU_size(U_col); @@ -823,7 +823,7 @@ namespace BaskerNS { if(kid%((Int)pow(2,lvl)) == 0) { - Int b = S[lvl][kid]; + Int b = S(lvl)(kid); for(Int l = 0; l < LL_size(b); l++) { @@ -885,7 +885,7 @@ namespace BaskerNS //printf("C: size: %d kid: %d \n", // iws_size, kid); - //thread_array[kid].C.init_matrix("cwork", + //thread_array(kid).C.init_matrix("cwork", // 0, iws_size, // 0, 2, // iws_size*2); @@ -895,7 +895,7 @@ namespace BaskerNS } //Also workspace matrix //This could be made smaller - thread_array[kid].C.init_matrix("cwork", 0, max_sep_size, + thread_array(kid).C.init_matrix("cwork", 0, max_sep_size, 0, 2, max_sep_size*2); } //end if btf_tabs_offset != 0 @@ -905,19 +905,19 @@ namespace BaskerNS { // if any left over for BLK factorization if(Options.btf == BASKER_TRUE) { - Int iws_mult = thread_array[kid].iws_mult; - Int iws_size = thread_array[kid].iws_size; - Int ews_mult = thread_array[kid].ews_mult; - Int ews_size = thread_array[kid].ews_size; + Int iws_mult = thread_array(kid).iws_mult; + Int iws_size = thread_array(kid).iws_size; + Int ews_mult = thread_array(kid).ews_mult; + Int ews_size = thread_array(kid).ews_size; for(Int i=0; i < iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0.0; + thread_array(kid).ews[i] = 0.0; } } } @@ -928,23 +928,23 @@ namespace BaskerNS { if(btf_tabs_offset != 0) { - INT_1DARRAY &ws = thread_array[kid].iws; - ENTRY_1DARRAY &X = thread_array[kid].ews; - Int iws_size = thread_array[kid].iws_size; - Int iws_mult = thread_array[kid].iws_mult; - Int ews_size = thread_array[kid].ews_size; - Int ews_mult = thread_array[kid].ews_mult; + INT_1DARRAY &ws = thread_array(kid).iws; + ENTRY_1DARRAY &X = thread_array(kid).ews; + Int iws_size = thread_array(kid).iws_size; + Int iws_mult = thread_array(kid).iws_mult; + Int ews_size = thread_array(kid).ews_size; + Int ews_mult = thread_array(kid).ews_mult; } } printf("init_workspace 1d, kid: %d size: %d %d %d %d \n", kid, iws_mult, iws_size, ews_mult, ews_size); for(Int i=0; i< iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0; + thread_array(kid).ews[i] = 0; } #endif //endif def basker_2dl //return 0; @@ -2467,12 +2467,12 @@ namespace BaskerNS Int Basker::find_leader(Int kid, Int l) { l = l+1; - Int my_token = S[l][kid]; + Int my_token = S(l)(kid); Int my_loc = kid; while((my_loc > 0)) { my_loc--; - if(S[l][my_loc] != my_token) + if(S(l)(my_loc) != my_token) { my_loc++; break; From da3a195e7b4c9753da8628a830fcd57617b975f9 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 25 Oct 2024 20:27:04 -0600 Subject: [PATCH 4/5] ShyLU - Basker : replace brackes back to parenthesis Signed-off-by: iyamazaki --- .../basker/src/shylubasker_error_manager.hpp | 26 +-- .../basker/src/shylubasker_nfactor_blk.hpp | 94 ++++---- .../src/shylubasker_nfactor_blk_inc.hpp | 144 ++++++------ .../basker/src/shylubasker_nfactor_col.hpp | 180 +++++++-------- .../basker/src/shylubasker_nfactor_col2.hpp | 40 ++-- .../src/shylubasker_nfactor_col_inc.hpp | 110 ++++----- .../basker/src/shylubasker_nfactor_diag.hpp | 8 +- .../basker/src/shylubasker_sfactor.hpp | 86 +++---- .../basker/src/shylubasker_sfactor_inc.hpp | 28 +-- .../basker/src/shylubasker_solve_rhs.hpp | 16 +- .../basker/src/shylubasker_solve_rhs_tr.hpp | 16 +- .../basker/src/shylubasker_stats.hpp | 10 +- .../basker/src/shylubasker_tree.hpp | 28 +-- .../basker/src/shylubasker_util.hpp | 214 +++++++++--------- 14 files changed, 490 insertions(+), 510 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index d9695c6e5c78..cd2c9f57bf0a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -94,8 +94,8 @@ namespace BaskerNS { Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; - if(LL[blkcol][0].nnz >= - LU[blkcol][blkUrow].nnz) + if(LL(blkcol)(0).nnz >= + LU(blkcol)(blkUrow).nnz) { resize_U = thread_array(ti).error_info; } @@ -116,7 +116,7 @@ namespace BaskerNS std::cout << " ++ resize L( tid = " << ti << " ): new size = " << resize_L << std::endl; } BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][thread_array(ti).error_subblk]; + LL(thread_array(ti).error_blk)(thread_array(ti).error_subblk); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -142,7 +142,7 @@ namespace BaskerNS std::cout << " ++ resize U( tid = " << ti << " ): new size = " << resize_U << std::endl; } BASKER_MATRIX &U = - LU[thread_array(ti).error_blk][0]; + LU(thread_array(ti).error_blk)(0); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -153,7 +153,7 @@ namespace BaskerNS U.nnz = resize_U; //Still need to clear pend BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][0]; + LL(thread_array(ti).error_blk)(0); L.clear_pend(); } @@ -167,7 +167,7 @@ namespace BaskerNS sb++) { BASKER_MATRIX &SL = - LL[thread_array(ti).error_blk][sb]; + LL(thread_array(ti).error_blk)(sb); for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -307,7 +307,7 @@ namespace BaskerNS { const Int tsb = (-1*thread_array(ti).error_subblk)-1; BASKER_MATRIX &L = - LL[thread_array(ti).error_blk][tsb]; + LL(thread_array(ti).error_blk)(tsb); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, resize_L); @@ -324,7 +324,7 @@ namespace BaskerNS { const Int tsb = thread_array(ti).error_subblk; BASKER_MATRIX &U = - LU[thread_array(ti).error_blk][tsb]; + LU(thread_array(ti).error_blk)(tsb); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, resize_U); @@ -352,7 +352,7 @@ namespace BaskerNS //Clear workspace, whole column for(Int sb = 0; sb < LL_size(blk); sb++) { - BASKER_MATRIX &SL = LL[blk][sb]; + BASKER_MATRIX &SL = LL(blk)(sb); for(Int i = 0; i < SL.iws_size*SL.iws_mult; ++i) { SL.iws(i) = (Int) 0; @@ -372,7 +372,7 @@ namespace BaskerNS Int blk = S(error_sep_lvl)(p); //if(LL(blk)(0).w_fill == BASKER_TRUE) { - BASKER_MATRIX &TM = LL[blk][0]; + BASKER_MATRIX &TM = LL(blk)(0); //printf( " > p=%d: scol_top = %d, scol = %d, ncol = %d\n",p,scol_top,TM.scol,TM.ncol ); for(Int i = scol_top + TM.scol; i < scol_top + (TM.scol+TM.ncol); i++) { @@ -386,7 +386,7 @@ namespace BaskerNS //Note, will have to clear the perm in all sep blk in that level //Clear permuation BASKER_MATRIX &SL = - LL[thread_array(ti).error_blk][0]; + LL(thread_array(ti).error_blk)(0); //printf( " + scol_top = %d, srow = %d, nrowl = %d\n",scol_top,SL.srow,SL.nrow ); for(Int i = scol_top + SL.srow; i < scol_top + (SL.srow+SL.nrow); i++) { @@ -512,7 +512,7 @@ namespace BaskerNS } //Resize L - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); L.clear_pend(); REALLOC_INT_1DARRAY(L.row_idx, L.nnz, @@ -533,7 +533,7 @@ namespace BaskerNS } //Resize U - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); REALLOC_INT_1DARRAY(U.row_idx, U.nnz, thread_array(ti).error_info); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 030d526299a1..2e0434796e33 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -150,14 +150,14 @@ namespace BaskerNS const Mag normA_blk = BTF_A.anorm; Int b = S(0)(kid); //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; - BASKER_MATRIX &M = ALM[b][0]; //A->blk + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); + BASKER_MATRIX &M = ALM(b)(0); //A->blk #ifdef BASKER_2DL //printf("Accessing blk: %d kid: %d \n", b, kid); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(kid).ews; @@ -983,8 +983,8 @@ namespace BaskerNS //Setup variables const Int wsb = S(0)(kid); - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1014,7 +1014,7 @@ namespace BaskerNS const Int b = S(lvl)(kid); //const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); const Int U_col = S(lvl)(kid); Int U_row = LU_size(U_col)-1; if(lvl > 0) @@ -1022,7 +1022,7 @@ namespace BaskerNS //U_row = (lvl==1)?(kid%2):S(l)(kid)%LU_size(U_col); } - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int brow = L.srow; @@ -1130,12 +1130,12 @@ namespace BaskerNS //Setup variables const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_g = L.srow + scol_top; // global offset - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -1281,10 +1281,10 @@ namespace BaskerNS //Setup variables const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; #else INT_1DARRAY ws = thread_array(kid).iws; Int ws_size = thread_array(kid).iws_size; @@ -1454,11 +1454,11 @@ namespace BaskerNS { const Int b = S(lvl)(kid); const Int wsb = S(0)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - ENTRY_1DARRAY X = LL[wsb][l].ews; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + ENTRY_1DARRAY X = LL(wsb)(l).ews; + Int ws_size = LL(wsb)(l).iws_size; #else INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(kid).ews; @@ -1534,10 +1534,10 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; //const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; @@ -1607,8 +1607,8 @@ namespace BaskerNS if (blkcol == 2 && blkrow == 1) printf( " L.colptr(%d) = %d\n",k+1,lnnz ); #endif - //LL[X_col][X_row].p_size = 0; - LL[X_col][X_row].p_size = 0; + //LL(X_col)(X_row).p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_dense_offdiag_mov_L() @@ -1623,12 +1623,12 @@ namespace BaskerNS Int X_col, Int X_row, Int k, Entry pivot) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; - const Int p_size = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; + const Int p_size = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -1714,7 +1714,7 @@ namespace BaskerNS } #endif - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_offdiag_mov_L() @@ -1733,17 +1733,17 @@ namespace BaskerNS BASKER_BOOL A_option) { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //printf( " t_dense_back_solve_offdiag( LL(%d,%d) and ALM(%d,%d)\n", blkcol,blkrow,blkcol,blkrow ); #ifdef BASKER_DEBUG_NFACTOR_BLK - Int ws_size = LL[X_col][X_row].iws_size; + Int ws_size = LL(X_col)(X_row).iws_size; const Int brow = L.srow; const Int bcol = L.scol; printf("\n\n"); @@ -1831,8 +1831,8 @@ namespace BaskerNS }//over all nonzero in left #ifdef BASKER_2DL - //LL[X_col][X_row].p_size = nnz; - LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug @@ -1878,14 +1878,14 @@ namespace BaskerNS { //Note: need to add support for offdiag permuation - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL[X_col][X_row].iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -2056,8 +2056,8 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - //LL[X_col][X_row].p_size = nnz; - LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index cf6fd8b3c0d9..c9e696f50786 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -670,8 +670,8 @@ namespace BaskerNS BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; //Int *color = &(ws[0]); Int *pattern = &(ws(ws_size)); @@ -990,8 +990,8 @@ namespace BaskerNS BASKER_MATRIX &L = LL(b)(0); const Int brow = L.srow; - INT_1DARRAY ws = LL[wsb][l].iws; - const Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + const Int ws_size = LL(wsb)(l).iws_size; Int *color = &(ws(0)); Int *pattern = &(ws(ws_size)); @@ -1555,14 +1555,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK printf("t_back_solve_diag, kid: %d blkcol: %d blkrow: %d \n", @@ -1696,7 +1696,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return; @@ -1717,14 +1717,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; //Int bcol = L.scol; @@ -1846,7 +1846,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -1869,14 +1869,14 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; Int brow = L.srow; Int bcol = L.scol; @@ -2046,7 +2046,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -2065,12 +2065,12 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; - const Int p_size = LL[X_col][X_row].p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; + const Int p_size = LL(X_col)(X_row).p_size; #ifdef BASKER_DEBUG_NFACTOR_BLK @@ -2155,14 +2155,14 @@ namespace BaskerNS //Fix later if(Options.same_pattern == BASKER_FALSE) { - for(Int i = 0; i < LL[X_col][X_row].nrow; i++) + for(Int i = 0; i < LL(X_col)(X_row).nrow; i++) { stack[i] = BASKER_MAX_IDX; } } L.col_ptr(k+1) = lnnz; - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_offdiag_mov_L_inc_lvl() @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); /* @@ -2740,14 +2740,14 @@ namespace BaskerNS LP_col, LP_row, kid); */ - BASKER_MATRIX *UPP = &LU[UP_col][0]; + BASKER_MATRIX *UPP = &LU(UP_col)(0); if(UP_row != BASKER_MAX_IDX) { - UPP = &(LU[UP_col][UP_row]); + UPP = &(LU(UP_col)(UP_row)); } BASKER_MATRIX &UP = *(UPP); - BASKER_MATRIX *LPP = &LU[LP_col][0]; + BASKER_MATRIX *LPP = &LU(LP_col)(0); if(LP_row != BASKER_MAX_IDX) { LPP = &(LL(LP_col)(LP_row)); @@ -2968,14 +2968,14 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; - Int nnz = LL[X_col][X_row].p_size; + Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //const Int bcol = L.scol; @@ -3105,7 +3105,7 @@ namespace BaskerNS */ - Int temp = INC_LVL_TEMP(k_i+LL[blkcol][0].srow) + L.inc_lvl(j) + 1; + Int temp = INC_LVL_TEMP(k_i+LL(blkcol)(0).srow) + L.inc_lvl(j) + 1; /* printf("lower row: %d kid: %d inc: %d %d %d j: %d \n", @@ -3182,7 +3182,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif //Debug @@ -3218,11 +3218,11 @@ namespace BaskerNS Int k, Entry pivot ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - const Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + const Int ws_size = LL(X_col)(X_row).iws_size; //const Int p_size = LL(X_col)(X_row).p_size; //NDE - warning: unused @@ -3295,7 +3295,7 @@ namespace BaskerNS } L.col_ptr(k+1) = lnnz; - LL[X_col][X_row].p_size = 0; + LL(X_col)(X_row).p_size = 0; return 0; }//end t_dense_offdiag_mov_L_inv_lvl() @@ -3314,12 +3314,12 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; - BASKER_MATRIX &B = ALM[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //Int brow = L.srow; @@ -3438,11 +3438,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3575,11 +3575,11 @@ namespace BaskerNS Int x_size, Int x_offset ) { - BASKER_MATRIX &L = LL[blkcol][blkrow]; + BASKER_MATRIX &L = LL(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; - Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; //Int nnz = LL(X_col)(X_row).p_size; //const Int brow = L.srow; //Not used @@ -3757,16 +3757,16 @@ namespace BaskerNS BASKER_MATRIX *B; if(lower == BASKER_TRUE) { - B = &(ALM[blkcol][blkrow]); + B = &(ALM(blkcol)(blkrow)); } else { - B = &(AVM[blkcol][blkrow]); + B = &(AVM(blkcol)(blkrow)); } BASKER_MATRIX &M = *B; //BASKER_MATRIX &M = ALM(blkcol)(blkrow); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -3856,12 +3856,12 @@ namespace BaskerNS for(Int blk = l+1; blk < endblk; ++blk) { // ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &wsL = LL[leader_idx][blk].iws; + INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //Int p_sizeL = LL(leader_idx)(blk).p_size; - Int ws_sizeL = LL[leader_idx][blk].iws_size; + Int ws_sizeL = LL(leader_idx)(blk).iws_size; // ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; //NDE - warning: unused - INT_1DARRAY &ws = LL[my_idx][blk].iws; - const Int ws_size = LL[my_idx][blk].iws_size; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; + const Int ws_size = LL(my_idx)(blk).iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -3874,7 +3874,7 @@ namespace BaskerNS Int *stackL = &(patternL[ws_sizeL]); //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { //if(kid==3) // { diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 68246464f757..289ee65f7ccd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -137,9 +137,9 @@ namespace BaskerNS Int U_col = S(lvl)(kid); Int U_row = 0; - const Int scol = LU[U_col][U_row].scol; - const Int ecol = LU[U_col][U_row].ecol; - const Int ncol = LU[U_col][U_row].ncol; + const Int scol = LU(U_col)(U_row).scol; + const Int ecol = LU(U_col)(U_row).ecol; + const Int ncol = LU(U_col)(U_row).ncol; //for(Int k = scol; k < ecol; k++) //might have to use k+scol for barrier @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -488,7 +488,7 @@ namespace BaskerNS //if(sep_flg == BASKER_FALSE) if(l == 0) { - Bp = &(AVM[U_col][U_row]); + Bp = &(AVM(U_col)(U_row)); //bbcol = Bp->scol; } else @@ -503,9 +503,9 @@ namespace BaskerNS // kid, X_col, X_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -871,16 +871,16 @@ namespace BaskerNS Int X_col = S(0)(my_leader); Int X_row = l; //this will change for us Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); const Int bcol = U.scol; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only work with with 2D layout"); #endif #ifdef BASKER_2DL - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only works with 2D layout"); #endif @@ -960,7 +960,7 @@ namespace BaskerNS { Int b = S(l)(kid); - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); INT_1DARRAY ws = thread_array(kid).iws; ENTRY_1DARRAY X = thread_array(team_leader).ews; Int ws_size = thread_array(kid).iws_size; @@ -1080,8 +1080,8 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); BASKER_MATRIX &B = thread_array(kid).C; @@ -1098,9 +1098,9 @@ namespace BaskerNS //B.print(); - INT_1DARRAY ws = LL[X_col][l+1].iws; - const Int ws_size = LL[X_col][l+1].iws_size; - ENTRY_1DARRAY X = LL[X_col][l+1].ews; + INT_1DARRAY ws = LL(X_col)(l+1).iws; + const Int ws_size = LL(X_col)(l+1).iws_size; + ENTRY_1DARRAY X = LL(X_col)(l+1).ews; Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A const Int brow_a = U.srow; // offset within A @@ -1648,12 +1648,12 @@ namespace BaskerNS Int X_row = l+1; Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; //U.fill(); + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //U.fill(); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int bcol = U.scol; @@ -1746,7 +1746,7 @@ namespace BaskerNS Int A_col = S(lvl)(kid); Int A_row = (lvl==1)?(2):S(l+1)(kid)%(LU_size(A_col)); - BASKER_MATRIX &B = AVM[A_col][A_col]; + BASKER_MATRIX &B = AVM(A_col)(A_col); const Int my_idx = S(0)(kid); team_leader = find_leader(kid, l); @@ -1769,17 +1769,17 @@ namespace BaskerNS //Split over threads (leader and nonleader) for(Int blk=l+1; blk Accumulate the update from (l-1)th level: // LU(U_col)(U_row) -= L(U_col)(l-1) * U(l-1)(U_row) t_add_extend(thread, kid, lvl, l-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_FALSE); if(kid%((Int)pow(2, l)) == 0) @@ -248,7 +248,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier(thread, kid, my_leader, - b_size, 3, LU[U_col][U_row].scol, 0); + b_size, 3, LU(U_col)(U_row).scol, 0); for(Int ti = 0; ti < num_threads; ti++) { if (thread_array(kid).error_type != BASKER_SUCCESS) { info = BASKER_ERROR; @@ -287,7 +287,7 @@ namespace BaskerNS printf( " kid=%d: calling t_add_extend(k=%d/%d)\n",kid,k,ncol ); fflush(stdout); #endif t_add_extend(thread, kid,lvl,lvl-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_TRUE); } #ifdef BASKER_TIMER @@ -515,7 +515,7 @@ namespace BaskerNS Int U_row = L_col-my_row_leader; Int X_row = l+1; //this will change for us - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); #ifdef BASKER_DEBUG_NFACTOR_COL2 if(L_row >= LL_size(L_col)) { @@ -609,10 +609,10 @@ namespace BaskerNS Int endblk = (lower)?(LL_size(my_idx)):(l+2); for(Int blk = l+1; blk < endblk; ++blk) { - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; - Int p_sizeL = LL[leader_idx][blk].p_size; - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; + Int p_sizeL = LL(leader_idx)(blk).p_size; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; Int *color = &(ws[0]); //printf( " + t_dense_blk_col_copy_atomic2(kid=%d: LL(%d)(%d) += LL(%d)(%d)\n",kid,leader_idx, blk,my_idx,blk); @@ -629,7 +629,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { color[jj] = 0; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -677,7 +677,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; } else { @@ -685,7 +685,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL[leader_idx][blk].p_size = p_sizeL; + LL(leader_idx)(blk).p_size = p_sizeL; //p_size = 0; //not needed }//over all blks } @@ -735,12 +735,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("copy, kid: %d bl: %d A: %d %d \n", @@ -749,7 +749,7 @@ namespace BaskerNS // X += B(:, k) BASKER_MATRIX &B = *Bp; - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; //printf( " -- t_dense_copy_update_matrix2(kid=%d: LL(%d)(%d) += B)\n",kid,leader_idx,bl ); //printf("ADDING UPDATES TO B\n"); //B.info(); @@ -800,9 +800,9 @@ namespace BaskerNS //For recounting patterns in dense blk //Need better sparse update - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; - const Int nrow = LL[leader_idx][bl].nrow; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; + const Int nrow = LL(leader_idx)(bl).nrow; Int *color = &(ws(0)); #ifdef BASKER_DEBUG_NFACTOR_COL2 printf("moving, kid: %d A: %d %d %d %d p_size: %d \n", @@ -886,7 +886,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 02fde7c7ccad..c6ddadf55092 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -101,7 +101,7 @@ namespace BaskerNS //for(Int k = 0; k < 1; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -148,7 +148,7 @@ namespace BaskerNS //barrier k = 0 usedl1 t_basker_barrier_inc_lvl(thread,kid,my_leader, - b_size, 0, LU[U_col][U_row].scol, 0); + b_size, 0, LU(U_col)(U_row).scol, 0); //printf("1 kid: %d error_leader: %d lvl: %d \n", kid, error_leader, lvl); BASKER_BOOL error_flag = BASKER_FALSE; basker_barrier.ExitGet(error_leader, error_flag); @@ -172,7 +172,7 @@ namespace BaskerNS { //for(Int k = 2; k < 3; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -181,7 +181,7 @@ namespace BaskerNS #endif t_add_extend_inc_lvl(thread, kid,lvl,l-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_FALSE); //where to start again @@ -234,7 +234,7 @@ namespace BaskerNS // printf("[3] barrier test, kid: %d leader: %d b_size: %d lvl: %d \n", // kid, my_leader, b_size, lvl); t_basker_barrier_inc_lvl(thread, kid, my_leader, - b_size, 7, LU[U_col][U_row].scol, 0); + b_size, 7, LU(U_col)(U_row).scol, 0); #ifdef BASKER_DEBUG_NFACTOR_COL_INC if(kid == 0) @@ -248,7 +248,7 @@ namespace BaskerNS //if(lvl < 2) { //for(Int k=0; k < 1; ++k) - for(Int k = 0; k < LU[U_col][U_row].ncol; ++k) + for(Int k = 0; k < LU(U_col)(U_row).ncol; ++k) { #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -259,7 +259,7 @@ namespace BaskerNS //printf("test: %d \n", LU(U_col)(U_row).scol); t_add_extend_inc_lvl(thread, kid,lvl,lvl-1, k, - LU[U_col][U_row].scol, + LU(U_col)(U_row).scol, BASKER_TRUE); Entry pivot = 0; if((kid%(Int)(pow(2,lvl))) == 0) @@ -654,13 +654,13 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; if(l == 0) { - Bp = &(AVM[U_col][U_row]); + Bp = &(AVM(U_col)(U_row)); } else { @@ -674,9 +674,9 @@ namespace BaskerNS // } //B.print(); - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -1121,7 +1121,7 @@ namespace BaskerNS //Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1272,7 +1272,7 @@ namespace BaskerNS Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //Need to give them the output pattern @@ -1453,7 +1453,7 @@ namespace BaskerNS Int col_idx_offset = 0; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); //const Int bcol = U.scol; #ifdef BASKER_DEBUG_NFACTOR_COL2 @@ -1564,12 +1564,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1580,10 +1580,10 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; Int *color = &(ws(0)); - LL[leader_idx][bl].p_size = 0; + LL(leader_idx)(bl).p_size = 0; //Get the columns pattern Int U_pattern_col = A_col; @@ -1606,7 +1606,7 @@ namespace BaskerNS //Copy into C - BASKER_MATRIX &Up = LU[U_pattern_col][U_pattern_row]; + BASKER_MATRIX &Up = LU(U_pattern_col)(U_pattern_row); for(Int i = Up.col_ptr(k); i < Up.col_ptr(k+1); i++) { const Int j = Up.row_idx(i); @@ -1620,7 +1620,7 @@ namespace BaskerNS //if there is a L if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &Lp = LL[L_pattern_col][L_pattern_row]; + BASKER_MATRIX &Lp = LL(L_pattern_col)(L_pattern_row); for(Int i = Lp.col_ptr(k)+1; i < Lp.col_ptr(k+1);i++) { const Int j = Lp.row_idx(i); @@ -1708,12 +1708,12 @@ namespace BaskerNS //printf("upper picked, kid: %d \n", kid); //printf("up: %d %d kid: %d \n", // A_col, A_row, kid); - Bp = &(AVM[A_col][A_row]); + Bp = &(AVM(A_col)(A_row)); } else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; //printf("ADDING UPDATES TO B\n"); @@ -1724,8 +1724,8 @@ namespace BaskerNS //return; //Int team_leader = find_leader(kid, l); //Not used - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; //const Int brow = LL(leader_idx)(bl).srow; //const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; @@ -1789,11 +1789,11 @@ namespace BaskerNS //Int CM_idx = kid; - ENTRY_1DARRAY X = LL[leader_idx][bl].ews; - INT_1DARRAY ws = LL[leader_idx][bl].iws; - const Int ws_size = LL[leader_idx][bl].ews_size; + ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; + INT_1DARRAY ws = LL(leader_idx)(bl).iws; + const Int ws_size = LL(leader_idx)(bl).ews_size; // const Int brow = LL(leader_idx)(bl).srow; //NU //NDE - warning: unused - const Int nrow = LL[leader_idx][bl].nrow; + const Int nrow = LL(leader_idx)(bl).nrow; //Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk @@ -1902,8 +1902,8 @@ namespace BaskerNS #endif //end get needed variables - BASKER_MATRIX &L = LL[L_col][L_row]; - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &L = LL(L_col)(L_row); + BASKER_MATRIX &U = LU(U_col)(U_row); BASKER_MATRIX &B = thread_array(kid).C; @@ -1926,9 +1926,9 @@ namespace BaskerNS } */ - INT_1DARRAY ws = LL[X_col][l+1].iws; - const Int ws_size = LL[X_col][l+1].iws_size; - ENTRY_1DARRAY X = LL[X_col][l+1].ews; + INT_1DARRAY ws = LL(X_col)(l+1).iws; + const Int ws_size = LL(X_col)(l+1).iws_size; + ENTRY_1DARRAY X = LL(X_col)(l+1).ews; const Int brow = U.srow; //const Int bcol = U.scol; @@ -2471,11 +2471,11 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; + INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; //const Int brow = U.srow; //const Int bcol = U.scol; @@ -2592,11 +2592,11 @@ namespace BaskerNS //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU[U_col][U_row]; + BASKER_MATRIX &U = LU(U_col)(U_row); - INT_1DARRAY ws = LL[X_col][X_row].iws; + INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; if(kid == leader_id) { @@ -2636,15 +2636,15 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused // Int p_sizeL = LL(leader_idx)(blk).p_size; //NDE - warning: unused // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; // const Int ws_size = LL(my_idx)(blk).iws_size; //NDE - warning: unused //Int p_size = LL(my_idx)(blk).p_size; - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; Int *color = &(ws[0]); // Int *pattern = &(color[ws_size]); //NDE - warning: unused // Int *stack = &(pattern[ws_size]); //NDE - warning: unused @@ -2716,7 +2716,7 @@ namespace BaskerNS if(U_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &UP = LU[U_pattern_col][U_pattern_row]; + BASKER_MATRIX &UP = LU(U_pattern_col)(U_pattern_row); for(Int jj = UP.col_ptr(k); jj < UP.col_ptr(k+1); @@ -2730,7 +2730,7 @@ namespace BaskerNS }//if UPattern if(L_pattern_row != BASKER_MAX_IDX) { - BASKER_MATRIX &LP = LL[L_pattern_col][L_pattern_row]; + BASKER_MATRIX &LP = LL(L_pattern_col)(L_pattern_row); for(Int jj = LP.col_ptr(k); jj < LP.col_ptr(k+1); jj++) @@ -2807,13 +2807,13 @@ namespace BaskerNS { //const Int blk = l+1; - ENTRY_1DARRAY &XL = LL[leader_idx][blk].ews; + ENTRY_1DARRAY &XL = LL(leader_idx)(blk).ews; // INT_1DARRAY &wsL = LL(leader_idx)(blk).iws; //NDE - warning: unused - Int p_sizeL = LL[leader_idx][blk].p_size; + Int p_sizeL = LL(leader_idx)(blk).p_size; // Int ws_sizeL = LL(leader_idx)(blk).iws_size; //NDE - warning: unused - ENTRY_1DARRAY &X = LL[my_idx][blk].ews; - INT_1DARRAY &ws = LL[my_idx][blk].iws; - const Int ws_size = LL[my_idx][blk].iws_size; + ENTRY_1DARRAY &X = LL(my_idx)(blk).ews; + INT_1DARRAY &ws = LL(my_idx)(blk).iws; + const Int ws_size = LL(my_idx)(blk).iws_size; //Int p_size = LL(my_idx)(blk).p_size; Int *color = &(ws[0]); Int *pattern = &(color[ws_size]); @@ -2845,7 +2845,7 @@ namespace BaskerNS #endif //over all nnnz found - for(Int jj = 0; jj < LL[my_idx][blk].nrow; ++jj) + for(Int jj = 0; jj < LL(my_idx)(blk).nrow; ++jj) { color[jj] = 0; @@ -2910,7 +2910,7 @@ namespace BaskerNS //This can be removed in the future if(kid != team_leader) { - LL[my_idx][blk].p_size = 0; + LL(my_idx)(blk).p_size = 0; } else { @@ -2918,7 +2918,7 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - LL[leader_idx][blk].p_size = p_sizeL; + LL(leader_idx)(blk).p_size = p_sizeL; //p_size = 0; NOT USED }//over all blks } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp index b87a0f48eadf..ccbd5a33b827 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_diag.hpp @@ -258,8 +258,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); Int k = btf_tabs(c); Int bcol = M.scol; @@ -336,8 +336,8 @@ namespace BaskerNS Int btab = btf_tabs_offset; BASKER_MATRIX &M = (c >= btab ? BTF_C : BTF_D); - BASKER_MATRIX &U = (c >= btab ? UBTF[c-btab] : U_D[c]); - BASKER_MATRIX &L = (c >= btab ? LBTF[c-btab] : L_D[c]); + BASKER_MATRIX &U = (c >= btab ? UBTF(c-btab) : U_D(c)); + BASKER_MATRIX &L = (c >= btab ? LBTF(c-btab) : L_D(c)); Int bcol = M.scol; //JDB: brow hack: fix. diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index ef9bdb8084ef..c955ff952551 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -349,35 +349,35 @@ int Basker::sfactor() #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[p]; - e_tree (ALM[blk][0], stree_p, 1); + e_tree (ALM(blk)(0), stree_p, 1); #else - e_tree (ALM[blk][0], stree, 1); + e_tree (ALM(blk)(0), stree, 1); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - post_order(ALM[blk][0], stree_p); + post_order(ALM(blk)(0), stree_p); #else - post_order(ALM[blk][0], stree); + post_order(ALM(blk)(0), stree); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - col_count (ALM[blk][0], stree_p); + col_count (ALM(blk)(0), stree_p); #else - col_count (ALM[blk][0], stree); + col_count (ALM(blk)(0), stree); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif //Assign nnz here - //leaf_assign_nnz(LL[blk][0], stree, 0); - //leaf_assign_nnz(LU[blk][LU_size[blk]-1], stree, 0); + //leaf_assign_nnz(LL(blk)(0), stree, 0); + //leaf_assign_nnz(LU(blk)(LU_size[blk]-1), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); @@ -387,11 +387,11 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - leaf_assign_nnz(LL[blk][0], stree_p, 0); - leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree_p, 0); + leaf_assign_nnz(LL(blk)(0), stree_p, 0); + leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree_p, 0); #else - leaf_assign_nnz(LL[blk][0], stree, 0); - leaf_assign_nnz(LU[blk][LU_size(blk)-1], stree, 0); + leaf_assign_nnz(LL(blk)(0), stree, 0); + leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); #endif #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); @@ -441,10 +441,10 @@ int Basker::sfactor() timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM[U_col][U_row], stree_p, + U_blk_sfactor(AVM(U_col)(U_row), stree_p, gScol(l), gSrow(glvl), off_diag); #else - U_blk_sfactor(AVM[U_col][U_row], stree, + U_blk_sfactor(AVM(U_col)(U_row), stree, gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER @@ -460,8 +460,8 @@ int Basker::sfactor() // stree, gScol, gSrow); //Assign nnz counts for leaf off-diag - //U_assign_nnz(LU[U_col][U_row], stree, 0); - //L_assign_nnz(LL[blk][l+1], stree, 0); + //U_assign_nnz(LU(U_col)(U_row), stree, 0); + //L_assign_nnz(LL(blk)(l+1), stree, 0); #ifdef BASKER_TIMER timer1.reset(); #endif @@ -472,11 +472,11 @@ int Basker::sfactor() printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); - L_assign_nnz(LL[blk][l+1], stree_p, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); + L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); #else - U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); - L_assign_nnz(LL[blk][l+1], stree, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); + L_assign_nnz(LL(blk)(l+1), stree, fill_factor, 0); #endif #ifdef BASKER_TIMER time2 += timer1.seconds(); @@ -540,43 +540,43 @@ int Basker::sfactor() //gScol(lvl), gSrow(pp)); #ifdef BASKER_TIMER - printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM[U_col][U_row].nrow,ALM[U_col][U_row].ncol,ALM[U_col][U_row].nnz ); fflush(stdout); + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); fflush(stdout); #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; - S_blk_sfactor(ALM[U_col][U_row], stree_p, + S_blk_sfactor(ALM(U_col)(U_row), stree_p, gScol(lvl), gSrow(pp)); #else - S_blk_sfactor(ALM[U_col][U_row], stree, + S_blk_sfactor(ALM(U_col)(U_row), stree, gScol(lvl), gSrow(pp)); #endif #ifdef BASKER_TIMER - printf( " >>> -> nnz = %d\n",ALM[U_col][U_row].nnz ); fflush(stdout); + printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); fflush(stdout); #endif - //S_assign_nnz(LL[U_col][U_row], stree, 0); + //S_assign_nnz(LL(U_col)(U_row), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LL[U_col][U_row], stree_p, 0); + S_assign_nnz(LL(U_col)(U_row), stree_p, 0); #else - S_assign_nnz(LL[U_col][U_row], stree, 0); + S_assign_nnz(LL(U_col)(U_row), stree, 0); #endif - //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); + //S_assign_nnz(LU(U_col)(LU_size[U_col]-1), stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree_p, 0); + S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); #else - S_assign_nnz(LU[U_col][LU_size(U_col)-1], stree, 0); + S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); #endif #ifdef BASKER_TIMER - printf( " >>> -> nnz = %d\n",LU[U_col][LU_size(U_col)-1].nnz); fflush(stdout); + printf( " >>> -> nnz = %d\n",LU(U_col)(LU_size(U_col)-1).nnz); fflush(stdout); #endif } #ifdef SHYLU_BASKER_STREE_LIST @@ -614,10 +614,10 @@ int Basker::sfactor() Int off_diag = 1; #ifdef SHYLU_BASKER_STREE_LIST - U_blk_sfactor(AVM[U_col][U_row], stree_p, + U_blk_sfactor(AVM(U_col)(U_row), stree_p, gScol(l), gSrow(pp), off_diag); #else - U_blk_sfactor(AVM[U_col][U_row], stree, + U_blk_sfactor(AVM(U_col)(U_row), stree, gScol(l), gSrow(pp), off_diag); #endif @@ -638,11 +638,11 @@ int Basker::sfactor() fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST - U_assign_nnz(LU[U_col][U_row], stree_p, fill_factor, 0); - L_assign_nnz(LL[inner_blk][l-lvl], stree_p, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); + L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); #else - U_assign_nnz(LU[U_col][U_row], stree, fill_factor, 0); - L_assign_nnz(LL[inner_blk][l-lvl], stree, fill_factor, 0); + U_assign_nnz(LU(U_col)(U_row), stree, fill_factor, 0); + L_assign_nnz(LL(inner_blk)(l-lvl), stree, fill_factor, 0); #endif //printf("Here 1 \n"); } @@ -2491,7 +2491,7 @@ int Basker::sfactor() #ifdef BASKER_TIMER printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - L_D[i].init_matrix("LBFT", + L_D(i).init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2499,12 +2499,12 @@ int Basker::sfactor() nnz); //For pruning - L_D[i].init_pend(); + L_D(i).init_pend(); #ifdef BASKER_TIMER printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - U_D[i].init_matrix("UBFT", + U_D(i).init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2546,7 +2546,7 @@ int Basker::sfactor() #ifdef BASKER_TIMER printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - LBTF[i-btf_tabs_offset].init_matrix("LBFT", + LBTF(i-btf_tabs_offset).init_matrix("LBFT", btf_tabs(i), lblk_size, btf_tabs(i), @@ -2555,12 +2555,12 @@ int Basker::sfactor() //For pruning //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); - LBTF[i-btf_tabs_offset].init_pend(); + LBTF(i-btf_tabs_offset).init_pend(); #ifdef BASKER_TIMER printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); #endif - UBTF[i-btf_tabs_offset].init_matrix("UBFT", + UBTF(i-btf_tabs_offset).init_matrix("UBFT", btf_tabs(i), lblk_size, btf_tabs(i), diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 890bc8a17fca..622bdf39a0fd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -100,9 +100,9 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM[blk][0], - LL[blk][0], - LU[blk][LU_size(blk)-1]); + sfactor_nd_dom_estimate(ALM(blk)(0), + LL(blk)(0), + LU(blk)(LU_size(blk)-1)); for(Int l=0; l < tree.nlvls; l++) { @@ -124,11 +124,11 @@ namespace BaskerNS //JDB TEST PASSED U_row = my_new_row; - sfactor_nd_upper_estimate(AVM[U_col][U_row], - LU[U_col][U_row]); + sfactor_nd_upper_estimate(AVM(U_col)(U_row), + LU(U_col)(U_row)); - sfactor_nd_lower_estimate(ALM[blk][l+1], - LL[blk][l+1]); + sfactor_nd_lower_estimate(ALM(blk)(l+1), + LL(blk)(l+1)); } // end for l @@ -141,9 +141,9 @@ namespace BaskerNS Int U_col = S(lvl+1)(ppp); Int U_row = 0; - sfactor_nd_sep_estimate(ALM[U_col][U_row], - LL[U_col][U_row], - LU[U_col][LU_size(U_col)-1]); + sfactor_nd_sep_estimate(ALM(U_col)(U_row), + LL(U_col)(U_row), + LU(U_col)(LU_size(U_col)-1)); Int innerblk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) @@ -167,12 +167,12 @@ namespace BaskerNS //JDB TEST PASS U_row = my_new_row; - sfactor_nd_sep_upper_estimate(AVM[U_col][U_row], - LU[U_col][U_row]); + sfactor_nd_sep_upper_estimate(AVM(U_col)(U_row), + LU(U_col)(U_row)); sfactor_nd_sep_lower_estimate( - ALM[innerblk][l-lvl], - LL[innerblk][l-lvl]); + ALM(innerblk)(l-lvl), + LL(innerblk)(l-lvl)); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp index b2fa1204cd86..b01d3ec72632 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs.hpp @@ -293,7 +293,7 @@ namespace BaskerNS for(Int b = nblks_c-1; b>= 0; b--) { //---Lower solve - BASKER_MATRIX &LC = LBTF[b]; + BASKER_MATRIX &LC = LBTF(b); #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n btf b=%ld (%d x %d), LBTF(%d)\n", (long)b, (int)LC.nrow, (int)LC.ncol, (int)b); #endif @@ -303,7 +303,7 @@ namespace BaskerNS //printVec(y,gn); - BASKER_MATRIX &UC = UBTF[b]; + BASKER_MATRIX &UC = UBTF(b); //U(C)\x -> y upper_tri_solve(UC,x,y); @@ -420,7 +420,7 @@ namespace BaskerNS for(Int b = btf_top_tabs_offset-1; b>= 0; b--) { //L(C)\x -> y - BASKER_MATRIX &LC = L_D[b]; + BASKER_MATRIX &LC = L_D(b); lower_tri_solve(LC, x, y); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after L solve (b=%d)\n",b ); fflush(stdout); @@ -429,7 +429,7 @@ namespace BaskerNS #endif //U(C)\y -> x - BASKER_MATRIX &UC = U_D[b]; + BASKER_MATRIX &UC = U_D(b); upper_tri_solve(UC, y, x); #ifdef BASKER_DEBUG_SOLVE_RHS printf( "\n after U solve\n" ); fflush(stdout); @@ -476,7 +476,7 @@ namespace BaskerNS //Forward solve on A for(Int b = 0; b < tree.nblks; ++b) { - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); //L\x -> y lower_tri_solve(L, x, y, scol_top); @@ -500,7 +500,7 @@ namespace BaskerNS //Update offdiag for(Int bb = 1; bb < LL_size(b); ++bb) { - BASKER_MATRIX &LD = LL[b][bb]; + BASKER_MATRIX &LD = LL(b)(bb); //x = LD*y; #ifdef BASKER_DEBUG_SOLVE_RHS char filename[200]; @@ -549,7 +549,7 @@ namespace BaskerNS #endif //U\y -> x - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); upper_tri_solve(U, y, x, scol_top); // NDE: y , x positions swapped... // seems role of x and y changed... #ifdef BASKER_DEBUG_SOLVE_RHS @@ -568,7 +568,7 @@ namespace BaskerNS #endif //y = UB*x; - BASKER_MATRIX &UB = LU[b][bb]; + BASKER_MATRIX &UB = LU(b)(bb); neg_spmv(UB, x, y, scol_top); #ifdef BASKER_DEBUG_SOLVE_RHS diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp index bfd6e2460062..f950e9bd6132 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_solve_rhs_tr.hpp @@ -346,10 +346,10 @@ namespace BaskerNS // Update off-diag in the block-row before the diag solve for(int bb = LL_size(b)-1; bb > 0; bb--) { - BASKER_MATRIX &LD = LL[b][bb]; + BASKER_MATRIX &LD = LL(b)(bb); neg_spmv_perm_tr(LD, x, y, scol_top); // update y as mod. rhs, x as solution } - BASKER_MATRIX &L = LL[b][0]; + BASKER_MATRIX &L = LL(b)(0); if (L.nrow != 0 && L.ncol != 0) // Avoid degenerate case e.g. empty block following nd-partitioning lower_tri_solve_tr(L, y, x, scol_top); // x and y should be equal after in M range... } @@ -373,10 +373,10 @@ namespace BaskerNS for(Int bb = 0; bb < LU_size(b)-1; bb++) { // update offdiag corresponding to the block-row - BASKER_MATRIX &UB = LU[b][bb]; + BASKER_MATRIX &UB = LU(b)(bb); neg_spmv_tr(UB, x, y, scol_top); } - BASKER_MATRIX &U = LU[b][LU_size(b)-1]; + BASKER_MATRIX &U = LU(b)(LU_size(b)-1); if (U.nrow != 0 && U.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(U, x, y, scol_top); } @@ -410,7 +410,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of { for(Int b = 0; b < btf_top_tabs_offset; b++) { - BASKER_MATRIX &UC = U_D[b]; + BASKER_MATRIX &UC = U_D(b); if ( b > 0 ) spmv_BTF_tr(b, BTF_D, x, y, false); @@ -418,7 +418,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC, x, y); - BASKER_MATRIX &LC = L_D[b]; + BASKER_MATRIX &LC = L_D(b); if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC, x, y); @@ -462,7 +462,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (nblks_c > 0) { Int offset = 0; for(Int b = 0; b < nblks_c; b++) { - BASKER_MATRIX &UC = UBTF[b]; + BASKER_MATRIX &UC = UBTF(b); // Update off-diag // Update X with Y @@ -472,7 +472,7 @@ if (Options.verbose) std::cout << "BTF_D^T begin: from 0 to " << btf_top_tabs_of if (UC.nrow != 0 && UC.ncol != 0) // Avoid degenerate case upper_tri_solve_tr(UC,x,y); - BASKER_MATRIX &LC = LBTF[b]; + BASKER_MATRIX &LC = LBTF(b); if (LC.nrow != 0 && LC.ncol != 0) // Avoid degenerate case lower_tri_solve_tr(LC,x,y); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp index 995bad188542..c7f804794f67 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp @@ -148,8 +148,8 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - MATRIX &myL = LL[l][0]; - stats.Lnnz += LL[l][0].nnz; + MATRIX &myL = LL(l)(0); + stats.Lnnz += LL(l)(0).nnz; }//over all Ls return stats.Lnnz; @@ -166,10 +166,10 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - for(Int r=0; r 0 ? U_view_count(i) : 1); if (U_view_size > 0) { - MALLOC_MATRIX_1DARRAY(AVM[i], U_view_size); - MALLOC_MATRIX_1DARRAY(LU[i], U_view_size); + MALLOC_MATRIX_1DARRAY(AVM(i), U_view_size); + MALLOC_MATRIX_1DARRAY(LU(i), U_view_size); } //Malloc AL subarray // NOTE: size at least one to allow empty block Int L_view_size = (L_view_count(i) > 0 ? L_view_count(i): 1); if (L_view_size > 0) { - MALLOC_MATRIX_1DARRAY(ALM[i], L_view_size); - MALLOC_MATRIX_1DARRAY(LL[i], L_view_size); + MALLOC_MATRIX_1DARRAY(ALM(i), L_view_size); + MALLOC_MATRIX_1DARRAY(LL(i), L_view_size); } LU_size(i) = U_view_count(i); @@ -855,11 +855,11 @@ namespace BaskerNS #endif for(Int j=i; j != -flat.ncol; j=tree.treetab[j]) { - MATRIX_1DARRAY &UMtemp = AVM[j]; - MATRIX_1DARRAY &LMtemp = ALM[i]; + MATRIX_1DARRAY &UMtemp = AVM(j); + MATRIX_1DARRAY &LMtemp = ALM(i); - MATRIX_1DARRAY &LUtemp = LU[j]; - MATRIX_1DARRAY &LLtemp = LL[i]; + MATRIX_1DARRAY &LUtemp = LU(j); + MATRIX_1DARRAY &LLtemp = LL(i); #ifdef MY_DEBUG printf( " AVM(%d)(%d).set_shape(%dx%d)\n",j,U_view_count[j], tree.col_tabs[i+1]-tree.col_tabs[i],tree.col_tabs[j+1]-tree.col_tabs[j] ); @@ -1056,7 +1056,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((L_row+1 < LL_size(L_col)) && - (tree.row_tabs(r_idx+1) == ALM[L_col][L_row+1].srow)) + (tree.row_tabs(r_idx+1) == ALM(L_col)(L_row+1).srow)) { //printf( " > ALM(%d)(%d).srow = %d, row_tab(%d) = %d\n",L_col,L_row+1,ALM(L_col)(L_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); L_row++; @@ -1071,7 +1071,7 @@ namespace BaskerNS (r_idx < tree.nblks && tree.row_tabs(r_idx+1) == tree.row_tabs(r_idx))) // skip empty blocks { if((U_row+1 < LU_size(U_col)) && - (tree.row_tabs(r_idx+1) == AVM[U_col][U_row+1].srow)) + (tree.row_tabs(r_idx+1) == AVM(U_col)(U_row+1).srow)) { //printf( " + AVM(%d)(%d).srow = %d, row_tab(%d) = %d\n",U_col,U_row+1,AVM(U_col)(U_row+1).srow, r_idx+1,tree.row_tabs(r_idx+1) ); U_row++; @@ -1095,8 +1095,8 @@ namespace BaskerNS //Get Matrix Ref - BASKER_MATRIX &Ltemp = ALM[L_col][L_row]; - BASKER_MATRIX &Utemp = AVM[U_col][U_row]; + BASKER_MATRIX &Ltemp = ALM(L_col)(L_row); + BASKER_MATRIX &Utemp = AVM(U_col)(U_row); Int bcol = Ltemp.scol; //diag blk @@ -1162,11 +1162,11 @@ namespace BaskerNS for(Int sb = 0; sb < LL_size(b); ++sb) { //printf( " ALM(%d)(%d).clean_col()\n",b,sb ); - ALM[b][sb].clean_col(); + ALM(b)(sb).clean_col(); } for(Int sb = 0; sb < LU_size(b); ++sb) { - AVM[b][sb].clean_col(); + AVM(b)(sb).clean_col(); } }//for - over all blks diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 715ac1c13f5f..2d8322c05de2 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -327,7 +327,7 @@ namespace BaskerNS { for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = LBTF[b-btf_tabs_offset]; + BASKER_MATRIX &L = LBTF(b-btf_tabs_offset); L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -342,7 +342,7 @@ namespace BaskerNS #if defined(BASKER_SPLIT_A) for(Int b=chunk_start; b < chunk_end; b++) { - BASKER_MATRIX &L = L_D[b]; + BASKER_MATRIX &L = L_D(b); L.clear_pend(); L.nnz = L.mnnz; }//end-for over chunck @@ -364,11 +364,11 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif - LL[b][row].clear_pend(); - LL[b][row].nnz = LL[b][row].mnnz; + LL(b)(row).clear_pend(); + LL(b)(row).nnz = LL(b)(row).mnnz; }//end over all row }//end select which thread @@ -383,13 +383,13 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif //LU(b)(LU_size(b)-1).nnz = 0; - for(Int kk = 0; kk < LU[b][LU_size(b)-1].ncol+1; kk++) + for(Int kk = 0; kk < LU(b)(LU_size(b)-1).ncol+1; kk++) { - LU[b][LU_size(b)-1].col_ptr(kk) = 0; + LU(b)(LU_size(b)-1).col_ptr(kk) = 0; } /* @@ -399,7 +399,7 @@ namespace BaskerNS LU(b)(LU_size(b)-1).mnnz); */ - LU[b][LU_size(b)-1].nnz = LU[b][LU_size(b)-1].mnnz; + LU(b)(LU_size(b)-1).nnz = LU(b)(LU_size(b)-1).mnnz; for(Int l = lvl+1; l < tree.nlvls+1; l++) { Int U_col = S(l)(kid); @@ -416,12 +416,12 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif - for(Int kk = 0; kk < LU[U_col][U_row].ncol+1; kk++) + for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) { - LU[U_col][U_row].col_ptr(kk) = 0; + LU(U_col)(U_row).col_ptr(kk) = 0; } /* printf("flipU (%d,%d) %d %d \n", @@ -430,7 +430,7 @@ namespace BaskerNS LU(U_col)(U_row).mnnz); */ - LU[U_col][U_row].nnz = LU[U_col][U_row].mnnz; + LU(U_col)(U_row).nnz = LU(U_col)(U_row).mnnz; //LU(U_col)(U_row).nnz = 0; }//over inner lvls @@ -466,19 +466,19 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif #ifdef BASKER_TIMER timer_init_matrixL.reset(); - printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL[b][row].nnz, (int)LL[b][row].mnnz); fflush(stdout); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); fflush(stdout); #endif - LL[b][row].init_matrix("Loffdig", - LL[b][row].srow, - LL[b][row].nrow, - LL[b][row].scol, - LL[b][row].ncol, - LL[b][row].nnz); + LL(b)(row).init_matrix("Loffdig", + LL(b)(row).srow, + LL(b)(row).nrow, + LL(b)(row).scol, + LL(b)(row).ncol, + LL(b)(row).nnz); #ifdef BASKER_TIMER printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); @@ -487,20 +487,20 @@ namespace BaskerNS //Fix when this all happens in the future if(Options.incomplete == BASKER_TRUE) { - LL[b][row].init_inc_lvl(); + LL(b)(row).init_inc_lvl(); } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); - printf( " ++ zero out (%d) ++\n",int(LL[b][row].col_ptr.extent(0)) ); fflush(stdout); + printf( " ++ zero out (%d) ++\n",int(LL(b)(row).col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - LL[b][row].init_ptr(); + LL(b)(row).init_ptr(); //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER - printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL[b][row].ncol ); fflush(stdout); + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - LL[b][row].init_pend(); + LL(b)(row).init_pend(); #ifdef BASKER_TIMER printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); #endif @@ -529,23 +529,23 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif #ifdef BASKER_TIMER printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, - (int)LU[b][LU_size(b)-1].nrow,(int)LU[b][LU_size(b)-1].ncol,(int)LU[b][LU_size(b)-1].nnz, (int)LU[b][LU_size(b)-1].mnnz, - (int)LU[b][LU_size(b)-1].srow,(int)LU[b][LU_size(b)-1].scol); + (int)LU(b)(LU_size(b)-1).nrow,(int)LU(b)(LU_size(b)-1).ncol,(int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz, + (int)LU(b)(LU_size(b)-1).srow,(int)LU(b)(LU_size(b)-1).scol); #endif - LU[b][LU_size(b)-1].init_matrix("Udiag", - LU[b][LU_size(b)-1].srow, - LU[b][LU_size(b)-1].nrow, - LU[b][LU_size(b)-1].scol, - LU[b][LU_size(b)-1].ncol, - LU[b][LU_size(b)-1].nnz); + LU(b)(LU_size(b)-1).init_matrix("Udiag", + LU(b)(LU_size(b)-1).srow, + LU(b)(LU_size(b)-1).nrow, + LU(b)(LU_size(b)-1).scol, + LU(b)(LU_size(b)-1).ncol, + LU(b)(LU_size(b)-1).nnz); //LU(b)(LU_size(b)-1).fill(); - LU[b][LU_size(b)-1].init_ptr(); + LU(b)(LU_size(b)-1).init_ptr(); //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) @@ -583,29 +583,29 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif #ifdef BASKER_TIMER printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, - (int)LU[U_col][U_row].nrow,(int)LU[U_col][U_row].ncol, - (int)LU[U_col][U_row].nnz, (int)LU[U_col][U_row].mnnz, - (int)LU[U_col][U_row].srow,(int)LU[U_col][U_row].scol); + (int)LU(U_col)(U_row).nrow,(int)LU(U_col)(U_row).ncol, + (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz, + (int)LU(U_col)(U_row).srow,(int)LU(U_col)(U_row).scol); #endif - LU[U_col][U_row].init_matrix("Uoffdiag", - LU[U_col][U_row].srow, - LU[U_col][U_row].nrow, - LU[U_col][U_row].scol, - LU[U_col][U_row].ncol, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).init_matrix("Uoffdiag", + LU(U_col)(U_row).srow, + LU(U_col)(U_row).nrow, + LU(U_col)(U_row).scol, + LU(U_col)(U_row).ncol, + LU(U_col)(U_row).nnz); //LU(U_col)(U_row).fill(); - LU[U_col][U_row].init_ptr(); + LU(U_col)(U_row).init_ptr(); //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { - LU[U_col][U_row].init_inc_lvl(); + LU(U_col)(U_row).init_inc_lvl(); } }//over inner lvls @@ -646,9 +646,9 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("ALM Factor Init: %d %d , kid: %d, nnz: %d nrow: %d ncol: %d \n", - b, row, kid, ALM[b][row].nnz, - ALM[b][row].nrow, - ALM[b][row].ncol); + b, row, kid, ALM(b)(row).nnz, + ALM(b)(row).nrow, + ALM(b)(row).ncol); #endif /*if (kid == 1) @@ -663,7 +663,7 @@ namespace BaskerNS printf("ALM(%d,%d: %dx%d) alloc with A: kid=%d btf=%d\n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid, Options.btf); #endif - ALM[b][row].convert2D(A, alloc, kid); + ALM(b)(row).convert2D(A, alloc, kid); } else { @@ -672,7 +672,7 @@ namespace BaskerNS printf("ALM(%d,%d, %dx%d) alloc (btf) with BTF_A: kid=%d \n", b, row, ALM(b)(row).nrow, ALM(b)(row).ncol, kid); #endif - ALM[b][row].convert2D(BTF_A, alloc, kid); + ALM(b)(row).convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < ALM(b)(row).ncol; j++) { @@ -697,9 +697,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INTI printf("AUM Factor init: %d %d, kid: %d nnz: %d nrow: %d ncol: %d \n", b, LU_size(b)-1, kid, - AVM[b][LU_size(b)-1].nnz, - AVM[b][LU_size(b)-1].nrow, - AVM[b][LU_size(b)-1].ncol); + AVM(b)(LU_size(b)-1).nnz, + AVM(b)(LU_size(b)-1).nrow, + AVM(b)(LU_size(b)-1).ncol); #endif /*if (kid == 1) { @@ -708,13 +708,13 @@ namespace BaskerNS }*/ if(Options.btf == BASKER_FALSE) { - AVM[b][LU_size(b)-1].convert2D(A, alloc, kid); + AVM(b)(LU_size(b)-1).convert2D(A, alloc, kid); } else { //printf("Using BTF AU\n"); //printf(" > kid=%d: convert2D AVM(%d,%d)\n", kid, b, LU_size(b)-1); - AVM[b][LU_size(b)-1].convert2D(BTF_A, alloc, kid); + AVM(b)(LU_size(b)-1).convert2D(BTF_A, alloc, kid); } /*if (kid == 0) { for(Int j = 0; j < AVM(b)(LU_size(b)-1).ncol; j++) { @@ -771,9 +771,9 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init AUM: %d %d lvl: %d l: %d kid: %d nnz: %d nrow: %d ncol: %d \n", U_col, U_row, lvl, l, kid, - AVM[U_col][U_row].nnz, - AVM[U_col][U_row].nrow, - AVM[U_col][U_row].ncol); + AVM(U_col)(U_row).nnz, + AVM(U_col)(U_row).nrow, + AVM(U_col)(U_row).ncol); #endif #if 0 @@ -793,7 +793,7 @@ namespace BaskerNS //printf("2nd convert AVM: %d %d size:%d kid: %d\n", // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); - AVM[U_col][U_row].convert2D(BTF_A, alloc, kid); + AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); } @@ -828,17 +828,17 @@ namespace BaskerNS for(Int l = 0; l < LL_size(b); l++) { //defining here - LL[b][l].iws_size = LL[b][l].nrow; + LL(b)(l).iws_size = LL(b)(l).nrow; //This can be made smaller, see notes in Sfactor_old - LL[b][l].iws_mult = 5; - LL[b][l].ews_size = LL[b][l].nrow; + LL(b)(l).iws_mult = 5; + LL(b)(l).ews_size = LL(b)(l).nrow; //This can be made smaller, see notes in sfactor_old - LL[b][l].ews_mult = 2; + LL(b)(l).ews_mult = 2; - Int iws_size = LL[b][l].iws_size; - Int iws_mult = LL[b][l].iws_mult; - Int ews_size = LL[b][l].ews_size; - Int ews_mult = LL[b][l].ews_mult; + Int iws_size = LL(b)(l).iws_size; + Int iws_mult = LL(b)(l).iws_mult; + Int ews_size = LL(b)(l).ews_size; + Int ews_mult = LL(b)(l).ews_mult; if(iws_size > max_sep_size) { @@ -851,10 +851,10 @@ namespace BaskerNS } BASKER_ASSERT((iws_size*iws_mult)>0, "util iws"); - MALLOC_INT_1DARRAY(LL[b][l].iws, iws_size*iws_mult); + MALLOC_INT_1DARRAY(LL(b)(l).iws, iws_size*iws_mult); for(Int i=0; i 0) { BASKER_ASSERT((ews_size*ews_mult)>0, "util ews"); - MALLOC_ENTRY_1DARRAY(LL[b][l].ews, ews_size*ews_mult); + MALLOC_ENTRY_1DARRAY(LL(b)(l).ews, ews_size*ews_mult); for(Int i=0; i Date: Fri, 25 Oct 2024 20:42:26 -0600 Subject: [PATCH 5/5] ShyLU - Basker : cleanups Signed-off-by: iyamazaki --- packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp | 1 - packages/shylu/shylu_node/basker/src/shylubasker_types.hpp | 4 ---- 2 files changed, 5 deletions(-) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index 5b6ae49e5e14..784df704eb59 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -1178,7 +1178,6 @@ namespace BaskerNS BASKER_INLINE int Basker::sfactor_copy() { - printf( " .. sfactor_copy ..\n" ); fflush(stdout); //Reorder A; //Match order if(match_flag == BASKER_TRUE) diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 193ecb11e24a..f57447b10906 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -172,7 +172,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - /*a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s);*/ \ Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -182,7 +181,6 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - /*a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1);*/ \ Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ @@ -237,7 +235,6 @@ enum BASKER_INCOMPLETE_CODE BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ - Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -247,7 +244,6 @@ enum BASKER_INCOMPLETE_CODE BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ - Kokkos::resize(a,s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \