diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp index 94f4ba1df086..f9b33e325bd7 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp @@ -1494,13 +1494,6 @@ namespace BaskerNS //end NDE - - //RHS and solutions (These are not used anymore) - ENTRY_2DARRAY rhs; - ENTRY_2DARRAY sol; - Int nrhs; - - BASKER_TREE part_tree; BASKER_TREE tree; BASKER_SYMBOLIC_TREE stree; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp index c1b92347a094..c7b9d66311ab 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp @@ -92,16 +92,8 @@ namespace BaskerNS BASKER_INLINE void Basker::Finalize() { - //finalize all matrices - A.Finalize(); - At.Finalize(); //??? is At even used - BTF_A.Finalize(); - BTF_C.Finalize(); - BTF_B.Finalize(); - BTF_D.Finalize(); - BTF_E.Finalize(); - //finalize array of 2d matrics + // Actuall Finalize is called by desctructor FREE_MATRIX_2DARRAY(AVM, tree.nblks); FREE_MATRIX_2DARRAY(ALM, tree.nblks); @@ -120,7 +112,6 @@ namespace BaskerNS //Thread Array FREE_THREAD_1DARRAY(thread_array); - basker_barrier.Finalize(); //S (Check on this) FREE_INT_2DARRAY(S, tree.nblks); @@ -187,12 +178,6 @@ namespace BaskerNS FREE_ENTRY_1DARRAY(x_view_ptr_scale); FREE_ENTRY_1DARRAY(y_view_ptr_scale); - - //Structures - part_tree.Finalize(); - tree.Finalize(); - stree.Finalize(); - stats.Finalize(); }//end Finalize() @@ -239,7 +224,7 @@ namespace BaskerNS //Option = 2, BTF BASKER if(option == 1) - { + { default_order(); } else if(option == 2) @@ -475,12 +460,16 @@ namespace BaskerNS //Find BTF ordering if(btf_order2() != BASKER_SUCCESS) { + if(Options.verbose == BASKER_TRUE) + { + printf("Basker Ordering Failed \n"); fflush(stdout); + } return BASKER_ERROR; } if(Options.verbose == BASKER_TRUE) { - printf("Basker Ordering Found \n"); + printf("Basker Ordering Found \n"); fflush(stdout); } /*if((Options.btf == BASKER_TRUE) && (btf_tabs_offset != 0)) @@ -512,7 +501,7 @@ namespace BaskerNS if(symb_flag == BASKER_TRUE) { if(Options.verbose == BASKER_TRUE) { - printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); + printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); fflush(stdout); } return BASKER_ERROR; } @@ -547,7 +536,7 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf(" == Basker Symbolic Done ==\n\n"); + printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout); } #ifdef BASKER_TIMER @@ -1573,7 +1562,7 @@ namespace BaskerNS #endif } - // ---------------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------------- // 'sort' rows of BTF_A into ND structure #if 0 for (Int i = 0; i < BTF_A.nnz; ++i) { @@ -1621,6 +1610,7 @@ namespace BaskerNS symmetric_sfactor(); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for symbolic after ND on a big block A: " << nd_symbolic_timer.seconds() << std::endl; + fflush(stdout); } Kokkos::Timer nd_last_dense_timer; @@ -1628,16 +1618,23 @@ namespace BaskerNS btf_last_dense(flag); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for last-dense after ND on a big block A: " << nd_last_dense_timer.seconds() << std::endl; + fflush(stdout); } #ifdef BASKER_KOKKOS // ---------------------------------------------------------------------------------------------- // Allocate & Initialize blocks + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif /*kokkos_sfactor_init_workspace iWS(flag, this); @@ -1950,10 +1947,16 @@ namespace BaskerNS }*/ Kokkos::Timer nd_setup2_timer; +#ifdef BASKER_PARALLEL_INIT_WORKSPACE kokkos_sfactor_init_workspace iWS(flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); +#else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(flag, p); + } +#endif if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index a6e1f5c41e91..cd2c9f57bf0a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -95,7 +95,7 @@ namespace BaskerNS Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; if(LL(blkcol)(0).nnz >= - LU(blkcol)(blkUrow).nnz) + LU(blkcol)(blkUrow).nnz) { resize_U = thread_array(ti).error_info; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp index 02a896d957c0..4bbd86507d9d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp @@ -95,6 +95,9 @@ namespace BaskerNS BASKER_INLINE int fill(); + BASKER_INLINE + void init_ptr(); + BASKER_INLINE void init_inc_lvl(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp index 4f12887c87ed..e40361e6f988 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp @@ -328,7 +328,7 @@ namespace BaskerNS if(nnz == _nnz) { copy_vec(_row_idx, _nnz, row_idx); - copy_vec(_val,_nnz, val); + copy_vec(_val, _nnz, val); } else { @@ -498,6 +498,13 @@ namespace BaskerNS return 0; } + template + BASKER_INLINE + void BaskerMatrix::init_ptr() + { + for (Int i = 0; i < ncol+1; i ++) col_ptr(i) = 0; + } + template BASKER_INLINE void BaskerMatrix::convert2D diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp index d2c6a5690528..cef593230d5e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp @@ -171,13 +171,9 @@ namespace BaskerNS }//end while if(Options.verbose == BASKER_TRUE) { - printf("Time DOMAIN: %lf \n", timer.seconds()); + printf("Time DOMAIN: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time DOMAIN: %lf \n", timer.seconds()); - timer.reset(); - #endif #else// else basker_kokkos #pragma omp parallel @@ -282,13 +278,9 @@ namespace BaskerNS //printf( " End Sep: info = %d (%d, %d)\n",info,BASKER_SUCCESS,BASKER_ERROR ); if(Options.verbose == BASKER_TRUE) { - printf("Time SEP: %lf \n", timer.seconds()); + printf("Time SEP: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time SEP: %lf \n", timer.seconds()); - timer.reset(); - #endif } // ---------------------------------------------------------------------------------------- // @@ -363,11 +355,8 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf("Time BTF: %lf \n", timer.seconds()); + printf("Time BTF: %lf \n\n", timer.seconds()); } - #ifdef BASKER_TIMER - printf("Time BTF: %lf \n", timer.seconds()); - #endif }//end btf call Kokkos::Timer tzback; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 499e00edd417..2e0434796e33 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -149,7 +149,7 @@ namespace BaskerNS const Mag normA = BTF_A.gnorm; const Mag normA_blk = BTF_A.anorm; - Int b = S[0][kid]; //Which blk from schedule + Int b = S(0)(kid); //Which blk from schedule BASKER_MATRIX &L = LL(b)(0); BASKER_MATRIX &U = LU(b)(LU_size(b)-1); BASKER_MATRIX &M = ALM(b)(0); //A->blk @@ -159,9 +159,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL(b)(0).ews; Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif //Int bcol = L.scol; //begining col //NOT UD Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1286,8 +1286,8 @@ namespace BaskerNS INT_1DARRAY ws = LL(wsb)(l).iws; const Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1460,9 +1460,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL(wsb)(l).ews; Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif const Entry zero (0.0); @@ -1607,7 +1607,7 @@ namespace BaskerNS if (blkcol == 2 && blkrow == 1) printf( " L.colptr(%d) = %d\n",k+1,lnnz ); #endif - //LL[X_col][X_row].p_size = 0; + //LL(X_col)(X_row).p_size = 0; LL(X_col)(X_row).p_size = 0; return 0; @@ -1831,7 +1831,7 @@ namespace BaskerNS }//over all nonzero in left #ifdef BASKER_2DL - //LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; LL(X_col)(X_row).p_size = nnz; #endif @@ -2056,7 +2056,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - //LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; LL(X_col)(X_row).p_size = nnz; #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 1fb5dc3fcc2b..c9e696f50786 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -1555,7 +1555,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -1717,7 +1717,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -1846,7 +1846,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -1869,7 +1869,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -2046,7 +2046,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -2176,18 +2176,18 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_old(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size[b]-1]; + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size[b]-1); #ifdef BASKER_2DL printf("Accessing blk: %d \n", b); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif Int bcol = L.scol; //begining col @@ -2576,15 +2576,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int brow = L.srow; @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); /* @@ -2756,11 +2756,10 @@ namespace BaskerNS - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; - - Int nnz = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; @@ -2969,7 +2968,7 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -3315,7 +3314,7 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 650bc77a8de6..289ee65f7ccd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -435,8 +435,8 @@ namespace BaskerNS for(Int l = 0; l < lvl; l++) { printf("OPS. KID : %d LVL: %d OPS : %d \n", - kid, l, thread_array[kid].ops_counts[l][0]); - thread_array[kid].ops_count[1][0] = 0; + kid, l, thread_array(kid).ops_counts[l][0]); + thread_array(kid).ops_count[1][0] = 0; } #endif @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -493,7 +493,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); //printf("Using temp matrix, kid: %d\n", kid); //Bp->print(); } @@ -613,7 +613,7 @@ namespace BaskerNS //Count ops to show imbalance #ifdef BASKER_COUNT_OPS - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif //WE SHOUD DO A UNNZ COUNT @@ -878,9 +878,9 @@ namespace BaskerNS #endif #ifdef BASKER_2DL - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only works with 2D layout"); #endif @@ -959,12 +959,12 @@ namespace BaskerNS ) { - Int b = S[l][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[team_leader].ews; - Int ws_size = thread_array[kid].iws_size; - Int ews_size = thread_array[team_leader].ews_size; + Int b = S(l)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(team_leader).ews; + Int ws_size = thread_array(kid).iws_size; + Int ews_size = thread_array(team_leader).ews_size; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid>3) @@ -1237,7 +1237,7 @@ namespace BaskerNS #endif #ifdef BASKER_OPS_COUNT - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif t_back_solve(kid, lvl,l+1, k, top, xnnz); // note: l not lvl given @@ -1868,7 +1868,6 @@ namespace BaskerNS if(kid != team_leader) { - //LL[my_idx][blk].p_size = 0; LL(my_idx)(blk).p_size = 0; } else @@ -1877,7 +1876,6 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - //LL[leader_idx][blk].p_size = p_sizeL; LL(leader_idx)(blk).p_size = p_sizeL; } p_size = 0; @@ -2035,7 +2033,6 @@ namespace BaskerNS if(kid != team_leader) { - //LL[my_idx][blk].p_size = 0; LL(my_idx)(blk).p_size = 0; } else @@ -2044,7 +2041,6 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - //LL[leader_idx][blk].p_size = p_sizeL; LL(leader_idx)(blk).p_size = p_sizeL; } p_size = 0; @@ -2104,7 +2100,7 @@ namespace BaskerNS else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; @@ -2181,7 +2177,7 @@ namespace BaskerNS const Int ws_size = LL(leader_idx)(bl).ews_size; const Int brow = LL(leader_idx)(bl).srow; const Int nrow = LL(leader_idx)(bl).nrow; - Int p_size = LL[leader_idx][bl].p_size; + Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk //Need better sparse update @@ -2248,7 +2244,6 @@ namespace BaskerNS printf("SETTING move_over set 0, L: %d %d kid: %d \n", leader_idx, bl, kid); #endif - //LL[leader_idx][bl].p_size = 0; LL(leader_idx)(bl).p_size = 0; p_count =0; } @@ -2261,7 +2256,6 @@ namespace BaskerNS printf("SETTING Re-pop pattern: %d %d size: %d \n", leader_idx, bl, p_count); #endif - //LL[leader_idx][bl].p_size = p_count; LL(leader_idx)(bl).p_size = p_count; } @@ -2334,7 +2328,7 @@ namespace BaskerNS else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; @@ -2345,17 +2339,11 @@ namespace BaskerNS //B.print(); team_leader = find_leader(kid, l); - //ENTRY_1DARRAY X = LL[team_leader][bl].ews; ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - //INT_1DARRAY ws = LL[team_leader][bl].iws; INT_1DARRAY ws = LL(leader_idx)(bl).iws; - //Int brow = LL[team_leader][bl].srow; - //Int nrow = LL[team_leader][bl].nrow; const Int brow = LL(leader_idx)(bl).srow; const Int nrow = LL(leader_idx)(bl).nrow; - //Int p_size = LL[team_leader][bl].p_size; Int p_size = LL(leader_idx)(bl).p_size; - //Int ws_size = LL[team_leader][bl].iws_size; const Int ws_size = LL(leader_idx)(bl).iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -2431,18 +2419,12 @@ namespace BaskerNS Int A_col = S(lvl)(kid); Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); Int CM_idx = kid; - //ENTRY_1DARRAY X = LL[team_leader][bl].ews; ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - //INT_1DARRAY ws = LL[team_leader][bl].iws; INT_1DARRAY ws = LL(leader_idx)(bl).iws; - //Int ws_size =LL[team_leader][bl].ews_size; - const Int ws_size =LL(leader_idx)(bl).ews_size; - //Int brow = LL[team_leader][bl].srow; + const Int ws_size = LL(leader_idx)(bl).ews_size; const Int brow = LL(leader_idx)(bl).srow; - //Int nrow = LL[team_leader][bl].nrow; const Int nrow = LL(leader_idx)(bl).nrow; - //Int p_size = LL[team_leader][bl].p_size; - Int p_size = LL[leader_idx][bl].p_size; + Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk //Need better sparse update @@ -2511,7 +2493,6 @@ namespace BaskerNS printf("SETTING move_over set 0, L: %d %d kid: %d \n", leader_idx, bl, kid); #endif - //LL[leader_idx][bl].p_size = 0; LL(leader_idx)(bl).p_size = 0; p_count =0; } @@ -2521,7 +2502,6 @@ namespace BaskerNS printf("SETTING Re-pop pattern: %d %d size: %d \n", leader_idx, bl, p_count); #endif - //LL[leader_idx][bl].p_size = p_count; LL(leader_idx)(bl).p_size = p_count; } @@ -2549,7 +2529,7 @@ namespace BaskerNS Int CM_idx = kid; BASKER_MATRIX_VIEW &B = AV[A_col][A_row]; - B.flip_base(&(thread_array[kid].C)); + B.flip_base(&(thread_array(kid).C)); B.k_offset = k; if(kid == 0) @@ -2630,8 +2610,8 @@ namespace BaskerNS /* Old Atomic Barrier BaskerBarrier BB; - BB.Barrier(thread_array[leader_kid].token[sublvl][function_n], - thread_array[leader_kid].token[sublvl][1], + BB.Barrier(thread_array(leader_kid).token[sublvl][function_n], + thread_array(leader_kid).token[sublvl][1], size); */ } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp index 801ad2ee6362..5e9345ed02ec 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp @@ -886,7 +886,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 1425385d9f2e..c6ddadf55092 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -654,7 +654,7 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -664,7 +664,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); } BASKER_MATRIX &B = *Bp; //if(kid ==0) @@ -2471,7 +2471,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; @@ -2592,7 +2592,7 @@ namespace BaskerNS //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp index 69d06a6bd72e..82ea04be3754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp @@ -1096,11 +1096,19 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2) find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS - kokkos_order_init_2D iO(this); - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this); + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + bool alloc = true; + //bool keep_zeros = true; + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else - //Comeback + //Comeback #endif #ifdef BASKER_TIMER double init_2d_time = scotch_timer.seconds(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index cc20d3b21e78..c955ff952551 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -117,9 +117,11 @@ namespace BaskerNS // thread.team_rank()); Int kid = basker->t_get_kid(thread); #endif + printf( " * kokkos_sfactor_init_factor(%d) *\n",kid ); fflush(stdout); basker->t_init_factor(kid); + printf( " * kokkos_sfactor_init_factor(%d) done *\n",kid ); fflush(stdout); //This needs to be done earlier in ordering now //basker->t_init_2DA(kid); @@ -159,7 +161,7 @@ int Basker::sfactor() printf("Total NNZ: %ld \n", (long)global_nnz); printf(" > blk_matching = %d\n", (int)Options.blk_matching ); printf("----------------------------------\n"); - printf("\n"); + printf("\n"); fflush(stdout); } } @@ -169,28 +171,45 @@ int Basker::sfactor() } //Allocate Factorspace - //printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", - // btf_tabs_offset,allocate_nd_workspace); + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", + btf_tabs_offset,allocate_nd_workspace); fflush(stdout); + #endif if(btf_tabs_offset != 0 && allocate_nd_workspace) { #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif #else #endif } + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_workspace <<\n"); fflush(stdout); + #endif //if(btf_tabs_offset != 0) { //Allocate workspace #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_WORKSPACE typedef Kokkos::TeamPolicy TeamPolicy; kokkos_sfactor_init_workspace iWS(setup_flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(setup_flag, p); + } + #endif #endif } @@ -266,8 +285,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.ncol > 0, "Basker symmetric_sfactor assert: A.ncol malloc > 0 failed"); - MALLOC_INT_1DARRAY(gScol[ii], A.ncol); - init_value(gScol[ii], A.ncol, (Int)0); + MALLOC_INT_1DARRAY(gScol(ii), A.ncol); + init_value(gScol(ii), A.ncol, (Int)0); } @@ -279,8 +298,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.nrow > 0, "sfactor A.nrow malloc"); - MALLOC_INT_1DARRAY(gSrow[ii], A.nrow); - init_value(gSrow[ii], A.nrow, (Int)0); + MALLOC_INT_1DARRAY(gSrow(ii), A.nrow); + init_value(gSrow(ii), A.nrow, (Int)0); } #ifdef BASKER_TIMER @@ -292,7 +311,9 @@ int Basker::sfactor() double time2 = 0.0; double time3 = 0.0; Kokkos::Timer timer1; + Kokkos::Timer timer2; timer.reset(); + timer2.reset(); #endif //split_num = num_threads/2; @@ -303,7 +324,7 @@ int Basker::sfactor() printf("\n --------------- OVER DOMS ---------------\n"); printf("\n"); } - #define SHYLU_BASKER_STREE_LIST + //#define SHYLU_BASKER_STREE_LIST std::vector stree_list (num_threads); #ifdef SHYLU_BASKER_STREE_LIST Kokkos::parallel_for( @@ -323,7 +344,7 @@ int Basker::sfactor() //printf("\n\n STREE SIZE: %d \n", AL[blk][0].ncol); //printf("Here 0\n"); //Find nnz_counts for leafs - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST @@ -332,7 +353,7 @@ int Basker::sfactor() #else e_tree (ALM(blk)(0), stree, 1); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif @@ -341,7 +362,7 @@ int Basker::sfactor() #else post_order(ALM(blk)(0), stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif @@ -350,19 +371,19 @@ int Basker::sfactor() #else col_count (ALM(blk)(0), stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif //Assign nnz here - //leaf_assign_nnz(LL[blk][0], stree, 0); - //leaf_assign_nnz(LU[blk][LU_size[blk]-1], stree, 0); + //leaf_assign_nnz(LL(blk)(0), stree, 0); + //leaf_assign_nnz(LU(blk)(LU_size[blk]-1), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); - printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); + printf( " >> leaf_assign_nnz(LU(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST @@ -372,7 +393,7 @@ int Basker::sfactor() leaf_assign_nnz(LL(blk)(0), stree, 0); leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); #endif } @@ -380,6 +401,10 @@ int Basker::sfactor() ); Kokkos::fence(); #endif + #ifdef BASKER_TIMER + double dom_time = timer2.seconds(); + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl << std::endl; + #endif for(Int p = 0; p < num_threads; ++p) { @@ -411,16 +436,16 @@ int Basker::sfactor() Int off_diag = 1; //printf( " U_blk_sfactor(AVM(%d,%d))\n",U_col,U_row ); //U_blk_sfactor(AV[U_col][U_row], stree, - // gScol[l], gSrow[glvl],0); + // gScol(l), gSrow(glvl),0); #ifdef BASKER_TIMER timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM(U_col)(U_row), stree_p, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #else U_blk_sfactor(AVM(U_col)(U_row), stree, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER time3 += timer1.seconds(); @@ -435,18 +460,17 @@ int Basker::sfactor() // stree, gScol, gSrow); //Assign nnz counts for leaf off-diag - //U_assign_nnz(LU[U_col][U_row], stree, 0); - //L_assign_nnz(LL[blk][l+1], stree, 0); - if(Options.verbose == BASKER_TRUE) - { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)blk,(int)l+1); - } + //U_assign_nnz(LU(U_col)(U_row), stree, 0); + //L_assign_nnz(LL(blk)(l+1), stree, 0); #ifdef BASKER_TIMER timer1.reset(); #endif - //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; + if(Options.verbose == BASKER_TRUE) + { + printf( " ++ U_assign_nnz(LU(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)U_col,(int)U_row, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + } #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); @@ -465,7 +489,7 @@ int Basker::sfactor() std::cout << " >> symmetric_sfactor::domain : " << timer.seconds() << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::postorder : " << time1_2 << " + " << time1_3 << " + " << time1 << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::init : " << time2 << " seconds" << std::endl; - std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl; + std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl << std::endl; timer.reset(); #endif @@ -484,13 +508,17 @@ int Basker::sfactor() //over all the seps in a lvle #ifdef SHYLU_BASKER_STREE_LIST + //printf( " parallel for \n" ); Kokkos::parallel_for( "permute_col", p, KOKKOS_LAMBDA(const int pp) #else + //printf( " serial for \n" ); for(Int pp = 0; pp < p; pp++) #endif { - //printf( " -- level = %d separator = %d --\n",lvl,pp ); + #ifdef BASKER_TIMER + printf( " -- level = %d/%d separator = %d/%d --\n",lvl,tree.nlvls, pp,p ); fflush(stdout); + #endif //S blks Int ppp; ppp = pp*pow(tree.nparts, lvl+1); @@ -509,9 +537,11 @@ int Basker::sfactor() Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, - //gScol[lvl], gSrow[pp]); + //gScol(lvl), gSrow(pp)); - //printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); fflush(stdout); + #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; S_blk_sfactor(ALM(U_col)(U_row), stree_p, @@ -520,29 +550,34 @@ int Basker::sfactor() S_blk_sfactor(ALM(U_col)(U_row), stree, gScol(lvl), gSrow(pp)); #endif - //printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); fflush(stdout); + #endif - //S_assign_nnz(LL[U_col][U_row], stree, 0); + //S_assign_nnz(LL(U_col)(U_row), stree, 0); if(Options.verbose == BASKER_TRUE) { - printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); + printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST S_assign_nnz(LL(U_col)(U_row), stree_p, 0); #else S_assign_nnz(LL(U_col)(U_row), stree, 0); #endif - //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); + //S_assign_nnz(LU(U_col)(LU_size[U_col]-1), stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { - printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); + printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); #else S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",LU(U_col)(LU_size(U_col)-1).nnz); fflush(stdout); + #endif } #ifdef SHYLU_BASKER_STREE_LIST ); @@ -563,6 +598,7 @@ int Basker::sfactor() Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { + //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); U_col = S(l+1)(ppp); U_row = S(lvl+1)(ppp)%LU_size(U_col); @@ -594,12 +630,13 @@ int Basker::sfactor() //Assign nnz + double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; if(Options.verbose == BASKER_TRUE) { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + printf( " ++ leaf_assign_nnz(LU(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)U_col,(int)U_row, (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ leaf_assign_nnz(LL(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)inner_blk,(int)(l-lvl), (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + fflush(stdout); } - double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); @@ -619,12 +656,15 @@ int Basker::sfactor() for(Int ii = 0 ; ii < split_num; ++ii) { //printf("split\n"); - FREE(gScol[ii]); - FREE(gSrow[ii]); + FREE(gScol(ii)); + FREE(gSrow(ii)); } FREE(gScol); FREE(gSrow); + #ifdef BASKER_TIMER + std::cout << " >> symmetric_sfactor done << " << std::endl; + #endif return 0; }//end symmetric_symbolic() @@ -1151,7 +1191,6 @@ int Basker::sfactor() BASKER_SYMBOLIC_TREE &ST ) { -printf( " col_count:: view \n" ); //Still like to find a way to do this without transpose BASKER_MATRIX Mt; matrix_transpose(MV, Mt); @@ -2220,6 +2259,9 @@ printf( " col_count:: view \n" ); Int option ) { + #ifdef BASKER_TIMER + printf("leaf_assign_nnz:\n"); + #endif if(option == 0) { const Int Int_MAX = std::numeric_limits::max(); @@ -2228,19 +2270,23 @@ printf( " col_count:: view \n" ); for(Int i = 0; i < M.ncol; i++) { if (t_nnz <= Int_MAX - ST.col_counts[i]) { + #ifdef BASKER_TIMER + //printf( " > %d: %d += %d\n",i,t_nnz, ST.col_counts[i] ); + #endif t_nnz += ST.col_counts[i]; } else { // let's just hope it is enough, if overflow break; } } - #ifdef BASKER_DEBUG_SFACTOR - printf("leaf nnz: %ld \n", (long)t_nnz); + #ifdef BASKER_TIMER + printf(" > leaf nnz: (%ld + %ld) / 2 = %ld\n", (long)t_nnz,(long)M.ncol,(long)(t_nnz+M.ncol)/2); #endif + t_nnz = long(t_nnz+M.ncol)/2; //double nnz_shoulder = 1.05; double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; // used to boost fill estimate - Int temp = fill_factor*t_nnz; + Int temp = fill_factor*t_nnz; // assuming (t_nnz/2) as triangular part if (temp > t_nnz) { M.nnz = temp; } else { @@ -2258,8 +2304,8 @@ printf( " col_count:: view \n" ); } if(Options.verbose == BASKER_TRUE) { - printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld) with fill-factor x(%d+%f = %f)\n", + (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol,(int)BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } } }//end assign_leaf_nnz @@ -2290,12 +2336,12 @@ printf( " col_count:: view \n" ); } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("U_assing_nnz: %ld \n", t_nnz); #endif //double fill_factor = 1.05; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2312,8 +2358,8 @@ printf( " col_count:: view \n" ); #endif if(Options.verbose == BASKER_TRUE) { - printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f + %f = %f), M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); } } }//end assign_upper_nnz @@ -2344,13 +2390,13 @@ printf( " col_count:: view \n" ); } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("L_assign_nnz: %ld \n", t_nnz); #endif // double fill_factor = 2.05; double old_nnz = M.nnz; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2367,8 +2413,8 @@ printf( " col_count:: view \n" ); } if(Options.verbose == BASKER_TRUE) { - printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %e + %e = %e), M.nnz = %ld -> %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); + printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld -> %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); } } }//end assign_lower_nnz @@ -2419,6 +2465,9 @@ printf( " col_count:: view \n" ); //printf("number of blks: %d \n", // btf_nblks-btf_tabs_offset); #endif + #ifdef BASKER_TIMER + printf( " > btf_last_dense(%s) <\n",(flag ? "true" : "false") ); fflush(stdout); + #endif Int max_blk_size = 0; #if defined(BASKER_SPLIT_A) @@ -2439,7 +2488,9 @@ printf( " col_count:: view \n" ); if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif L_D(i).init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2450,6 +2501,9 @@ printf( " col_count:: view \n" ); //For pruning L_D(i).init_pend(); + #ifdef BASKER_TIMER + printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif U_D(i).init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2459,6 +2513,9 @@ printf( " col_count:: view \n" ); }//over all blks } #endif + #ifdef BASKER_TIMER + printf( " > top blocks done <\n" ); fflush(stdout); + #endif //Malloc L and U #ifdef BASKER_DEBUG_SFACTOR @@ -2486,7 +2543,9 @@ printf( " col_count:: view \n" ); if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif LBTF(i-btf_tabs_offset).init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2498,7 +2557,9 @@ printf( " col_count:: view \n" ); //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); LBTF(i-btf_tabs_offset).init_pend(); - //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif UBTF(i-btf_tabs_offset).init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2511,6 +2572,9 @@ printf( " col_count:: view \n" ); //MALLOC workspace }//over all blks } + #ifdef BASKER_TIMER + printf( " > left blocks done <\n" ); fflush(stdout); + #endif //JDB: This needs to be fixed max_blk_size = BTF_D.nrow + BTF_C.nrow; @@ -2530,23 +2594,27 @@ printf( " col_count:: view \n" ); //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); - if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); - } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("Malloc Thread: %d iws: %d \n", i, (thread_array(i).iws_size* thread_array(i).iws_mult)); - printf("Malloc Thread: %d ews: %d \n", + printf("Malloc Thread: %d ews: %d \n", i, (thread_array(i).ews_size* thread_array(i).ews_mult)); #endif + if (max_blk_size > 0) { + MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); + } } } + #ifdef BASKER_TIMER + printf( " > btf_last_dense done <\n" ); + #endif }//end btf_last_dense() }//end namespace Bakser +#undef BASKER_TIMER #endif//endif BASKER_SFACTOR_NEWFRM_HPP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 64c041a6536c..622bdf39a0fd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -100,8 +100,8 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM(blk)(0), - LL(blk)(0), + sfactor_nd_dom_estimate(ALM(blk)(0), + LL(blk)(0), LU(blk)(LU_size(blk)-1)); for(Int l=0; l < tree.nlvls; l++) @@ -156,7 +156,7 @@ namespace BaskerNS U_row = S(lvl+1)(ppp)%LU_size(U_col); if((S(lvl+1)(ppp) > 14) && - (S(lvl+1)(ppp) > LU_size(U_col)) + (S(lvl+1)(ppp) > LU_size(U_col)) ) { Int tm = (S(lvl+1)(ppp)+1)/16; @@ -172,7 +172,7 @@ namespace BaskerNS sfactor_nd_sep_lower_estimate( ALM(innerblk)(l-lvl), - LL(innerblk)(l-lvl)); + LL(innerblk)(l-lvl)); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp index 995bad188542..c7f804794f67 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp @@ -148,8 +148,8 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - MATRIX &myL = LL[l][0]; - stats.Lnnz += LL[l][0].nnz; + MATRIX &myL = LL(l)(0); + stats.Lnnz += LL(l)(0).nnz; }//over all Ls return stats.Lnnz; @@ -166,10 +166,10 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - for(Int r=0; r 0) { FREE_INT_1DARRAY(roots); @@ -267,7 +265,7 @@ namespace BaskerNS ~basker_symbolic_tree() { - //Finalize(); + Finalize(); }//end ~basker_symbolic_tree BASKER_INLINE diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index be4c146e9c83..784df704eb59 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -118,7 +118,7 @@ namespace BaskerNS for(Int i =0; i < tree.nblks+1; i++) { BASKER_ASSERT(num_threads > 0, "tree num_threads"); - MALLOC_INT_1DARRAY(S[i], num_threads); + MALLOC_INT_1DARRAY(S(i), num_threads); } //this will want to be across all threads @@ -335,7 +335,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -356,7 +356,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -368,11 +368,11 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -592,7 +592,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -611,7 +611,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -624,10 +624,10 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -855,11 +855,11 @@ namespace BaskerNS #endif for(Int j=i; j != -flat.ncol; j=tree.treetab[j]) { - MATRIX_1DARRAY &UMtemp = AVM[j]; - MATRIX_1DARRAY &LMtemp = ALM[i]; + MATRIX_1DARRAY &UMtemp = AVM(j); + MATRIX_1DARRAY &LMtemp = ALM(i); - MATRIX_1DARRAY &LUtemp = LU[j]; - MATRIX_1DARRAY &LLtemp = LL[i]; + MATRIX_1DARRAY &LUtemp = LU(j); + MATRIX_1DARRAY &LLtemp = LL(i); #ifdef MY_DEBUG printf( " AVM(%d)(%d).set_shape(%dx%d)\n",j,U_view_count[j], tree.col_tabs[i+1]-tree.col_tabs[i],tree.col_tabs[j+1]-tree.col_tabs[j] ); @@ -1322,9 +1322,15 @@ namespace BaskerNS #ifdef BASKER_KOKKOS BASKER_BOOL keep_zeros = BASKER_FALSE; BASKER_BOOL alloc = alloc_BTFA; //BASKER_FALSE; - kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else //Comeback #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 6009e346f73b..f57447b10906 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -90,7 +90,7 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_INC_TOL_VALUE 0.0001 //MACRO INC FILL (this will become dynamic in the future) -#define BASKER_FILL_USER 1.00 +#define BASKER_FILL_USER 0.00 #define BASKER_FILL_LESTIMATE 1.50 #define BASKER_FILL_UESTIMATE 1.50 #define BASKER_FILL_LLOWERESTIMATE 2.00 @@ -144,17 +144,17 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_KOKKOS_NOINIT Kokkos::ViewAllocateWithoutInitializing #define INT_RANK2DARRAY Kokkos::View #define INT_1DARRAY Kokkos::View -#define INT_2DARRAY Kokkos::View #define ENTRY_1DARRAY Kokkos::View -#define ENTRY_2DARRAY Kokkos::View #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define MATRIX_1DARRAY Kokkos::View -#define MATRIX_2DARRAY Kokkos::View -#define MATRIX_VIEW_1DARRAY Kokkos::View -#define MATRIX_VIEW_2DARRAY Kokkos::View -#define THREAD_1DARRAY Kokkos::View -#define THREAD_2DARRAY Kokkos::View + +#define INT_2DARRAY Kokkos::View +#define ENTRY_2DARRAY Kokkos::View +#define MATRIX_1DARRAY Kokkos::View +#define MATRIX_2DARRAY Kokkos::View +#define MATRIX_VIEW_1DARRAY Kokkos::View +#define MATRIX_VIEW_2DARRAY Kokkos::View +#define THREAD_1DARRAY Kokkos::View #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +163,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -172,7 +172,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -181,7 +181,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1); \ + Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } @@ -189,7 +189,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_2DARRAY("int_2d",s); \ + a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -198,7 +198,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -207,7 +207,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_2DARRAY("entry_2d",s); \ + a = ENTRY_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -216,7 +216,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -225,7 +225,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_2DARRAY("bool_2d", s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -234,7 +234,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_1DARRAY("matrix_1d",s); \ + a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -243,7 +243,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_2DARRAY("matrix_2d",s); \ + a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,7 +252,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s); \ + a = MATRIX_VIEW_1DARRAY(Kokkos::view_alloc("matrix_view_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -261,7 +261,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s); \ + a = MATRIX_VIEW_2DARRAY(Kokkos::view_alloc("matrix_view_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -270,33 +270,12 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_1DARRAY("thread_1d",s); \ + a = THREAD_1DARRAY(Kokkos::view_alloc("thread_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ } -#define MALLOC_THREAD_2DARRAY(a,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ - if (s > 0) { \ - a = THREAD_2DARRAY("thread_2d",s); \ - if(a.data() == NULL) \ - throw std::bad_alloc(); \ - } \ - } -//RESIZE (with copy) -#define RESIZE_1DARRAY(a,os,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT RESIZE 1D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s); \ - } -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) \ - { \ - BASKER_ASSERT(s1 >= 0 && s2 >= 0, "BASKER ASSERT RESIZE 2D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s1,s2); \ - } -#define RESIZE_INT_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) -#define RESIZE_ENTRY_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) + //REALLOC (no copy) #define REALLOC_1DARRAY(a,os,s) \ { \ @@ -310,6 +289,7 @@ enum BASKER_INCOMPLETE_CODE } #define REALLOC_INT_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) #define REALLOC_ENTRY_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) + //Set values #define SET_INT_1DARRAY(a, b, s) \ { \ @@ -334,77 +314,73 @@ enum BASKER_INCOMPLETE_CODE #define FREE(a) BASKER_NO_OP -#define FREE_INT_1DARRAY_PAIRS(a) \ - { \ - a = INT_1DARRAY_PAIRS(); \ +#define FREE_INT_1DARRAY_PAIRS(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_1DARRAY(a) \ - { \ - a = INT_1DARRAY(); \ +#define FREE_INT_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_RANK2DARRAY(a) \ - { \ - a = INT_RANK2DARRAY(); \ +#define FREE_INT_RANK2DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_2DARRAY(a,n) \ - { \ - a = INT_2DARRAY(); \ +#define FREE_INT_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_1DARRAY(a) \ - { \ - a = ENTRY_1DARRAY(); \ +#define FREE_ENTRY_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_2DARRAY(a,n) \ - { \ - a = ENTRY_2DARRAY(); \ +#define FREE_ENTRY_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_1DARRAY(a) \ - { \ - a = BOOL_1DARRAY(); \ +#define FREE_BOOL_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_2DARRAY(a,n) \ - { \ - a = BOOL_2DARRAY(); \ +#define FREE_BOOL_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_1DARRAY(a) \ - { \ - a = MATRIX_1DARRAY(); \ +#define FREE_MATRIX_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_2DARRAY(a,n) \ - { \ - a = MATRIX_2DARRAY(); \ +#define FREE_MATRIX_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ - { \ - a = MATRIX_VIEW_1DARRAY(); \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ - { \ - a = MATRIX_VIEW_2DARRAY(); \ +#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } #define FREE_THREAD_1DARRAY(a) \ - { \ - a = THREAD_1DARRAY(); \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - a = TRHEAD_2DARRAY(); \ - } +#else // not BASKER_KOKKOS -#else //Execution Space #define BASKER_EXE_SPACE void* //ReMacro Basker Classes @@ -428,7 +404,6 @@ enum BASKER_INCOMPLETE_CODE #define MATRIX_VIEW_1DARRAY BASKER_MATRIX_VIEW* #define MATRIX_VIEW_2DARRAY BASKER_MATRIX_VIEW** #define THREAD_1DARRAY BASKER_THREAD* -#define THREAD_2DARRAY BASKER_THREAD** //Macro Memory Calls //Malloc @@ -443,12 +418,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -#define MALLOC_THREAD_2DARRAY(a,s) a = new THREAD_1DARRAY [s] -//Resize (copy old data) (come back and add) -#define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP -#define RESIZE_INT_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_ENTRY_1DARRAY(a,os,s) BASKER_NO_OP //Realloc (dont copy old data) #define REALLOC_1DARRAY(a,os,s) BASKER_NO_OP #define REALLOC_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP @@ -525,13 +494,6 @@ enum BASKER_INCOMPLETE_CODE FREE(a); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - for(BASKER_INT MACRO_I = 0; MACRO_I < s; MACRO_I++) \ - FREE(a[MACRO_I]); \ - FREE(a); \ - } - #endif //end ifdef BASKER_KOKKOS //Inline command diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 130f62ea6127..2d8322c05de2 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -252,11 +252,11 @@ namespace BaskerNS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -291,12 +291,11 @@ namespace BaskerNS #ifdef BASKER_KOKKOS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; - Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + Kokkos::parallel_for(TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -365,7 +364,7 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif LL(b)(row).clear_pend(); @@ -384,7 +383,7 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif //LU(b)(LU_size(b)-1).nnz = 0; @@ -417,7 +416,7 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) @@ -455,8 +454,8 @@ namespace BaskerNS Kokkos::Timer timer_init_matrixL; Kokkos::Timer timer_fill_matrixL; timer_initL.reset(); + printf( " > t_init_factor( tid = %d, nlvls = %d ) <\n",kid,tree.nlvls+1 ); fflush(stdout); #endif - //printf( " > t_init_factor( tid = %d ) <\n",kid ); for(Int lvl = 0; lvl < tree.nlvls+1; lvl++) { if(kid%((Int)pow(2,lvl)) == 0) @@ -467,13 +466,13 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif #ifdef BASKER_TIMER timer_init_matrixL.reset(); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); fflush(stdout); #endif - //printf( " lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); LL(b)(row).init_matrix("Loffdig", LL(b)(row).srow, LL(b)(row).nrow, @@ -481,6 +480,7 @@ namespace BaskerNS LL(b)(row).ncol, LL(b)(row).nnz); #ifdef BASKER_TIMER + printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); #endif @@ -491,15 +491,19 @@ namespace BaskerNS } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); + printf( " ++ zero out (%d) ++\n",int(LL(b)(row).col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - Kokkos::deep_copy(LL(b)(row).col_ptr, 0); + LL(b)(row).init_ptr(); + //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - //printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); LL(b)(row).init_pend(); - + #ifdef BASKER_TIMER + printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); + #endif }//end over all row }//end select which thread }//end for over all lvl @@ -508,6 +512,7 @@ namespace BaskerNS std::cout << " > Basker t_init_factor::initL(" << kid << "): time: " << initL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::initMatrix(" << kid << "): time: " << init_matrixL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::fillMatrix(" << kid << "): time: " << fill_matrixL_time << std::endl; + fflush(stdout); #endif //U @@ -524,10 +529,14 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif - //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); + #ifdef BASKER_TIMER + printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, + (int)LU(b)(LU_size(b)-1).nrow,(int)LU(b)(LU_size(b)-1).ncol,(int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz, + (int)LU(b)(LU_size(b)-1).srow,(int)LU(b)(LU_size(b)-1).scol); + #endif LU(b)(LU_size(b)-1).init_matrix("Udiag", LU(b)(LU_size(b)-1).srow, LU(b)(LU_size(b)-1).nrow, @@ -536,7 +545,8 @@ namespace BaskerNS LU(b)(LU_size(b)-1).nnz); //LU(b)(LU_size(b)-1).fill(); - Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); + LU(b)(LU_size(b)-1).init_ptr(); + //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) { @@ -573,10 +583,15 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif - //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); + #ifdef BASKER_TIMER + printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, + (int)LU(U_col)(U_row).nrow,(int)LU(U_col)(U_row).ncol, + (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz, + (int)LU(U_col)(U_row).srow,(int)LU(U_col)(U_row).scol); + #endif LU(U_col)(U_row).init_matrix("Uoffdiag", LU(U_col)(U_row).srow, LU(U_col)(U_row).nrow, @@ -585,7 +600,8 @@ namespace BaskerNS LU(U_col)(U_row).nnz); //LU(U_col)(U_row).fill(); - Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); + LU(U_col)(U_row).init_ptr(); + //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { @@ -775,7 +791,7 @@ namespace BaskerNS { //printf(" %d: Using BTF AVM(%d,%d), %dx%d\n",kid,U_col,U_row, AVM(U_col)(U_row).nrow,AVM(U_col)(U_row).ncol); //printf("2nd convert AVM: %d %d size:%d kid: %d\n", - // U_col, U_row, AVM(U_col)(U_row).nnz, + // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); @@ -859,19 +875,20 @@ namespace BaskerNS //printf( " kid=%d :: LL(%d, %d).fill\n",kid, b,l ); //LL(b)(l).fill(); - Kokkos::deep_copy(LL(b)(l).col_ptr, 0); + LL(b)(l).init_ptr(); + //Kokkos::deep_copy(LL(b)(l).col_ptr, 0); if(l==0) { //Also workspace matrix //This could be made smaller //printf("C: size: %d kid: %d \n", - // iws_size, kid); + // iws_size, kid); - //thread_array[kid].C.init_matrix("cwork", - // 0, iws_size, - // 0, 2, - // iws_size*2); + //thread_array(kid).C.init_matrix("cwork", + // 0, iws_size, + // 0, 2, + // iws_size*2); } } //end for l } @@ -888,19 +905,19 @@ namespace BaskerNS { // if any left over for BLK factorization if(Options.btf == BASKER_TRUE) { - Int iws_mult = thread_array[kid].iws_mult; - Int iws_size = thread_array[kid].iws_size; - Int ews_mult = thread_array[kid].ews_mult; - Int ews_size = thread_array[kid].ews_size; + Int iws_mult = thread_array(kid).iws_mult; + Int iws_size = thread_array(kid).iws_size; + Int ews_mult = thread_array(kid).ews_mult; + Int ews_size = thread_array(kid).ews_size; for(Int i=0; i < iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0.0; + thread_array(kid).ews[i] = 0.0; } } } @@ -920,14 +937,14 @@ namespace BaskerNS } } printf("init_workspace 1d, kid: %d size: %d %d %d %d \n", - kid, iws_mult, iws_size, ews_mult, ews_size); + kid, iws_mult, iws_size, ews_mult, ews_size); for(Int i=0; i< iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0; + thread_array(kid).ews[i] = 0; } #endif //endif def basker_2dl //return 0; @@ -995,7 +1012,7 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - BASKER_MATRIX &myL = LL[l][0]; + BASKER_MATRIX &myL = LL(l)(0); for(Int k = 0; k < myL.ncol; k++) { @@ -1033,13 +1050,13 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k=0; k < LL[l][0].ncol; k++) + for(Int k=0; k < LL(l)(0).ncol; k++) { - fprintf(fp, "k=%ld \n", (long)k+LL[l][0].scol); + fprintf(fp, "k=%ld \n", (long)k+LL(l)(0).scol); for(Int r = 0; r < LL_size[l]; r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); for(Int j = myL.col_ptr[k]; j < myL.col_ptr[k+1]; j++) { fprintf(fp, "(%ld , %ld , %ld, %ld, %ld) %g , ", @@ -1109,12 +1126,12 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k=0; k < LL[l][0].ncol; k++) + for(Int k=0; k < LL(l)(0).ncol; k++) { - //fprintf(fp, "k=%d \n", k+LL[l][0].scol); + //fprintf(fp, "k=%d \n", k+LL(l)(0).scol); for(Int r = 0; r < LL_size[l]; r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); for(Int j = myL.col_ptr[k]; j < myL.col_ptr[k+1]; j++) { @@ -1167,12 +1184,12 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k = 0; k < LU[l][0].ncol; k++) + for(Int k = 0; k < LU(l)(0).ncol; k++) { //over each row of U for(Int r = 0; r < LU_size[l]; r++) { - BASKER_MATRIX &myU = LU[l][r]; + BASKER_MATRIX &myU = LU(l)(r); //over each nnz in column (k) of local U for(Int j = myU.col_ptr[k]; j < myU.col_ptr[k+1]; j++) @@ -1196,7 +1213,7 @@ namespace BaskerNS Int nblks = btf_nblks-btf_tabs_offset; for(Int i =0; i < nblks; i++) { - BASKER_MATRIX &myU = UBTF[i]; + BASKER_MATRIX &myU = UBTF(i); for(Int k = 0; k < myU.ncol; k++) { for(Int j = myU.col_ptr[k]; j< myU.col_ptr[k+1]; j++) @@ -1230,14 +1247,14 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k = 0; k < LU[l][0].ncol; k++) + for(Int k = 0; k < LU(l)(0).ncol; k++) { - fprintf(fp, "k=%ld \n", (long)k+LU[l][0].scol); + fprintf(fp, "k=%ld \n", (long)k+LU(l)(0).scol); //over each row of U for(Int r = 0; r < LU_size[l]; r++) { - BASKER_MATRIX &myU = LU[l][r]; + BASKER_MATRIX &myU = LU(l)(r); //over each nnz in column (k) of local U for(Int j = myU.col_ptr[k]; j < myU.col_ptr[k+1]; j++) @@ -1261,7 +1278,7 @@ namespace BaskerNS Int nblks = btf_nblks-btf_tabs_offset; for(Int i =0; i < nblks; i++) { - BASKER_MATRIX &myU = UBTF[i]; + BASKER_MATRIX &myU = UBTF(i); for(Int k = 0; k < myU.ncol; k++) { fprintf(fp, "k=%ld \n", (long)k+myU.scol); @@ -1304,7 +1321,7 @@ namespace BaskerNS fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(fp, "%%Generated by **Basker** \n"); fprintf(fp, "%%Starting Row %ld Starting Col %ld \n", - (long)M.srow, (long)M.scol); + (long)M.srow, (long)M.scol); fprintf(fp, "%ld %ld %ld \n", (long)M.nrow, (long)M.ncol, (long)M.nnz); Int bcol=M.scol; @@ -1334,7 +1351,7 @@ namespace BaskerNS fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(fp, "%%Generated by **Basker** \n"); fprintf(fp, "%%Starting Row %d Starting Col %d \n", - M.srow, M.scol); + M.srow, M.scol); fprintf(fp, "%ld %ld %ld \n", (long)M.nrow, (long)M.ncol, (long)M.nnz); Int bcol=M.scol; @@ -1721,7 +1738,7 @@ namespace BaskerNS { for(Int r = 0; r < LL_size(l); r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); Int brow = myL.srow; Int bcol = myL.scol; @@ -2354,7 +2371,7 @@ namespace BaskerNS ) { return (Int)(thread.league_rank()*thread.team_size()+ - thread.team_rank()); + thread.team_rank()); }//end t_get_kid @@ -2477,4 +2494,5 @@ namespace BaskerNS }//end namespace basker +#undef BASKER_TIMER #endif //end basker_util_hpp