Skip to content

Commit

Permalink
Merge branch 'develop' into v2024.xx.y-RC
Browse files Browse the repository at this point in the history
  • Loading branch information
rhornung67 committed Aug 9, 2024
2 parents 4a87047 + e1e7d0c commit 66cbd54
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 49 deletions.
38 changes: 30 additions & 8 deletions scripts/lc-builds/toss4_cray-mpich_amdclang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,19 @@ echo
echo "To use fp64 HW atomics you must configure with these options when using gfx90a and hip >= 5.2"
echo " -DCMAKE_CXX_FLAGS=\"-munsafe-fp-atomics\""
echo
echo "To work around some issues where *_FUSED kernels crash add these options"
echo " -DCMAKE_CXX_FLAGS=\"-fgpu-rdc\""
echo " -DCMAKE_EXE_LINKER_FLAGS=\"-fgpu-rdc\""
echo
echo "To work around some issues where *_FUSED kernels perform poorly use this environment variable"
echo " env HSA_SCRATCH_SINGLE_LIMIT=4000000000"
echo
echo "To work around some issues where the build fails with a weird error about max or fmax add these options"
echo " -DCMAKE_CXX_FLAGS=\"--hip-version={hip_version:ex=6.1.2}\""
echo " -DCMAKE_EXE_LINKER_FLAGS=\"--hip-version={hip_version:ex=6.1.2}\""
echo



rm -rf build_${BUILD_SUFFIX} >/dev/null
mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
Expand All @@ -67,18 +80,27 @@ module load cmake/3.23.1

# unload rocm to avoid configuration problems where the loaded rocm and COMP_VER
# are inconsistent causing the rocprim from the module to be used unexpectedly
module unload rocm
module unload rocm rocmcc

if [[ "${COMP_VER}" == *-magic ]]; then
ROCM_PATH="/usr/tce/packages/rocmcc/rocmcc-${COMP_VER}"
MPI_ROCM_PATH="/usr/tce/packages/cray-mpich/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}"
else
ROCM_PATH="/opt/rocm-${COMP_VER}"
MPI_ROCM_PATH=/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}
fi

cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMPI_C_COMPILER="/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpiamdclang" \
-DMPI_CXX_COMPILER="/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpiamdclang++" \
-DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \
-DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \
-DHIP_PATH=/opt/rocm-${COMP_VER}/llvm/bin \
-DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang \
-DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang++ \
-DMPI_C_COMPILER="${MPI_ROCM_PATH}/bin/mpiamdclang" \
-DMPI_CXX_COMPILER="${MPI_ROCM_PATH}/bin/mpiamdclang++" \
-DCMAKE_PREFIX_PATH="${ROCM_PATH}/lib/cmake" \
-DHIP_PLATFORM=amd \
-DROCM_ROOT_DIR="${ROCM_PATH}" \
-DHIP_ROOT_DIR="${ROCM_PATH}/hip" \
-DHIP_PATH="${ROCM_PATH}/llvm/bin" \
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/amdclang" \
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/amdclang++" \
-DCMAKE_HIP_ARCHITECTURES="${COMP_ARCH}" \
-DGPU_TARGETS="${COMP_ARCH}" \
-DAMDGPU_TARGETS="${COMP_ARCH}" \
Expand Down
93 changes: 53 additions & 40 deletions src/common/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,64 +730,77 @@ void Executor::runWarmupKernels()
getCout() << "\n\nRun warmup kernels...\n";

//
// For kernels to be run, assemble a set of feature IDs
// Get warmup kernels to run from input
//
std::set<FeatureID> feature_ids;
for (size_t ik = 0; ik < kernels.size(); ++ik) {
KernelBase* kernel = kernels[ik];
std::set<KernelID> kernel_ids = run_params.getWarmupKernelIDsToRun();

if ( kernel_ids.empty() ) {

for (size_t fid = 0; fid < NumFeatures; ++fid) {
FeatureID tfid = static_cast<FeatureID>(fid);
if (kernel->usesFeature(tfid) ) {
feature_ids.insert( tfid );
//
// If no warmup kernels were given, choose a warmup kernel for each feature
//

//
// For kernels to be run, assemble a set of feature IDs
//
std::set<FeatureID> feature_ids;
for (size_t ik = 0; ik < kernels.size(); ++ik) {
KernelBase* kernel = kernels[ik];

for (size_t fid = 0; fid < NumFeatures; ++fid) {
FeatureID tfid = static_cast<FeatureID>(fid);
if (kernel->usesFeature(tfid) ) {
feature_ids.insert( tfid );
}
}
}

} // iterate over kernels

//
// Map feature IDs to set of warmup kernel IDs
//
std::set<KernelID> kernel_ids;
for ( auto fid = feature_ids.begin(); fid != feature_ids.end(); ++ fid ) {
} // iterate over kernels

switch (*fid) {
//
// Map feature IDs to set of warmup kernel IDs
//
for ( auto fid = feature_ids.begin(); fid != feature_ids.end(); ++ fid ) {

case Forall:
case Kernel:
case Launch:
kernel_ids.insert(Basic_DAXPY); break;
switch (*fid) {

case Sort:
kernel_ids.insert(Algorithm_SORT); break;

case Scan:
kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;
case Forall:
case Kernel:
case Launch:
kernel_ids.insert(Basic_DAXPY); break;

case Workgroup:
kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;
case Sort:
kernel_ids.insert(Algorithm_SORT); break;

case Reduction:
kernel_ids.insert(Basic_REDUCE3_INT); break;
case Scan:
kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;

case Atomic:
kernel_ids.insert(Basic_PI_ATOMIC); break;
case Workgroup:
kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;

case View:
break;
case Reduction:
kernel_ids.insert(Basic_REDUCE3_INT); break;

#ifdef RAJA_PERFSUITE_ENABLE_MPI
case MPI:
kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
#endif
case Atomic:
kernel_ids.insert(Basic_PI_ATOMIC); break;

default:
break;
case View:
break;

#ifdef RAJA_PERFSUITE_ENABLE_MPI
case MPI:
kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
#endif

default:
break;

}

}

}


//
// Run warmup kernels
//
Expand Down
110 changes: 109 additions & 1 deletion src/common/RunParams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ RunParams::RunParams(int argc, char** argv)
checkrun_reps(1),
reference_variant(),
reference_vid(NumVariants),
warmup_kernel_input(),
invalid_warmup_kernel_input(),
kernel_input(),
invalid_kernel_input(),
exclude_kernel_input(),
Expand Down Expand Up @@ -195,6 +197,15 @@ void RunParams::print(std::ostream& str) const
str << "\n hip MPI data space = " << getDataSpaceName(hipMPIDataSpace);
str << "\n kokkos MPI data space = " << getDataSpaceName(kokkosMPIDataSpace);

str << "\n warmup_kernel_input = ";
for (size_t j = 0; j < warmup_kernel_input.size(); ++j) {
str << "\n\t" << warmup_kernel_input[j];
}
str << "\n invalid_warmup_kernel_input = ";
for (size_t j = 0; j < invalid_warmup_kernel_input.size(); ++j) {
str << "\n\t" << invalid_warmup_kernel_input[j];
}

str << "\n kernel_input = ";
for (size_t j = 0; j < kernel_input.size(); ++j) {
str << "\n\t" << kernel_input[j];
Expand Down Expand Up @@ -789,6 +800,22 @@ void RunParams::parseCommandLineOptions(int argc, char** argv)
input_state = BadInput;
}

} else if ( opt == std::string("--warmup-kernels") ||
opt == std::string("-wk") ) {

bool done = false;
i++;
while ( i < argc && !done ) {
opt = std::string(argv[i]);
if ( opt.at(0) == '-' ) {
i--;
done = true;
} else {
warmup_kernel_input.push_back(opt);
++i;
}
}

} else if ( opt == std::string("--kernels") ||
opt == std::string("-k") ) {

Expand Down Expand Up @@ -1288,6 +1315,15 @@ void RunParams::printHelpMessage(std::ostream& str) const

str << "\t --disable-warmup (disable warmup kernels) [Default is run warmup kernels that are relevant to kernels selected to run]\n\n";

str << "\t --warmup-kernels, -wk <space-separated strings> [Default is run warmup kernels that are relevant to kernels selected to run]\n"
<< "\t (names of individual kernels and/or groups of kernels to warmup)\n"
<< "\t See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n"
<< "\t Kernel names are listed as <group name>_<kernel name>.\n";
str << "\t\t Examples...\n"
<< "\t\t --warmup-kernels Polybench (warmup all kernels in Polybench group)\n"
<< "\t\t -wk INIT3 MULADDSUB (warmup INIT3 and MULADDSUB kernels)\n"
<< "\t\t -wk INIT3 Apps (warmup INIT3 kernel and all kernels in Apps group)\n\n";

str << "\t --kernels, -k <space-separated strings> [Default is run all]\n"
<< "\t (names of individual kernels and/or groups of kernels to run)\n"
<< "\t See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n"
Expand Down Expand Up @@ -1942,6 +1978,77 @@ void RunParams::processKernelInput()
//
// ================================================================

run_warmup_kernels.clear();

if ( !warmup_kernel_input.empty() ) {

//
// Need to parse input to determine which warmup kernels to run
//

// Make list copy of warmup kernel name input to manipulate for
// processing potential group names and/or kernel names, next
Slist warmup_kern_names(warmup_kernel_input.begin(), warmup_kernel_input.end());

//
// Search warmup_kern_names for matching group names.
// warmup_groups2run will contain names of groups to run.
//
Svector warmup_groups2run;
for (Slist::iterator it = warmup_kern_names.begin(); it != warmup_kern_names.end(); ++it)
{
for (size_t ig = 0; ig < NumGroups; ++ig) {
const std::string& group_name = getGroupName(static_cast<GroupID>(ig));
if ( group_name == *it ) {
warmup_groups2run.push_back(group_name);
}
}
}

//
// If group name(s) found in warmup_kern_names, assemble kernels in group(s)
// to run and remove those group name(s) from warmup_kern_names list.
//
for (size_t ig = 0; ig < warmup_groups2run.size(); ++ig) {
const std::string& gname(warmup_groups2run[ig]);

for (size_t kid = 0; kid < NumKernels; ++kid) {
KernelID tkid = static_cast<KernelID>(kid);
if ( getFullKernelName(tkid).find(gname) != std::string::npos &&
exclude_kernels.find(tkid) == exclude_kernels.end()) {
run_warmup_kernels.insert(tkid);
}
}

warmup_kern_names.remove(gname);
}

//
// Look for matching names of individual kernels in remaining warmup_kern_names.
//
for (Slist::iterator it = warmup_kern_names.begin(); it != warmup_kern_names.end(); ++it)
{
bool found_it = false;

for (size_t kid = 0; kid < NumKernels && !found_it; ++kid) {
KernelID tkid = static_cast<KernelID>(kid);
if ( getKernelName(tkid) == *it || getFullKernelName(tkid) == *it ) {
if (exclude_kernels.find(tkid) == exclude_kernels.end()) {
run_warmup_kernels.insert(tkid);
}
found_it = true;
}
}

// Assemble invalid input for output message.
if ( !found_it ) {
invalid_warmup_kernel_input.push_back(*it);
}

} // iterate over kernel name input

}

run_kernels.clear();

if ( kernel_input.empty() && feature_input.empty() ) {
Expand Down Expand Up @@ -2091,7 +2198,8 @@ void RunParams::processKernelInput()
// Set BadInput state based on invalid kernel input
//

if ( !(invalid_kernel_input.empty()) ||
if ( !(invalid_warmup_kernel_input.empty()) ||
!(invalid_kernel_input.empty()) ||
!(invalid_exclude_kernel_input.empty()) ) {
input_state = BadInput;
}
Expand Down
4 changes: 4 additions & 0 deletions src/common/RunParams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ class RunParams {

bool getDisableWarmup() const { return disable_warmup; }

const std::set<KernelID>& getWarmupKernelIDsToRun() const { return run_warmup_kernels; }
const std::set<KernelID>& getKernelIDsToRun() const { return run_kernels; }
const std::set<VariantID>& getVariantIDsToRun() const { return run_variants; }
VariantID getReferenceVariantID() const { return reference_vid; }
Expand Down Expand Up @@ -360,6 +361,8 @@ class RunParams {
// Arrays to hold input strings for valid/invalid input. Helpful for
// debugging command line args.
//
std::vector<std::string> warmup_kernel_input;
std::vector<std::string> invalid_warmup_kernel_input;
std::vector<std::string> kernel_input;
std::vector<std::string> invalid_kernel_input;
std::vector<std::string> exclude_kernel_input;
Expand Down Expand Up @@ -390,6 +393,7 @@ class RunParams {

bool disable_warmup;

std::set<KernelID> run_warmup_kernels;
std::set<KernelID> run_kernels;
std::set<VariantID> run_variants;

Expand Down

0 comments on commit 66cbd54

Please sign in to comment.