Skip to content

Commit

Permalink
merge upstream/next
Browse files Browse the repository at this point in the history
  • Loading branch information
Nadish Saini committed Aug 29, 2024
2 parents 8b79588 + f87f768 commit 74469ff
Show file tree
Hide file tree
Showing 71 changed files with 1,181 additions and 1,368 deletions.
1 change: 0 additions & 1 deletion 3rd_party/occa/src/occa/internal/modes/dpcpp/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ namespace occa

void device::setArchCompilerFlags(std::string& compilerFlags)
{
setDeviceArchCompilerFlags(dpcppDevice, compilerFlags);
}

void device::compileKernel(const std::string &hashDir,
Expand Down
102 changes: 0 additions & 102 deletions 3rd_party/occa/src/occa/internal/modes/dpcpp/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,108 +90,6 @@ namespace occa
dpcpp_properties["linker_flags"] = linker_flags;
}

std::string getDeviceArchString(const ::sycl::device& sycl_device) {
#if SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE
namespace syclex = sycl::ext::oneapi::experimental;
using arch_type = syclex::architecture;
arch_type arch = sycl_device.get_info<syclex::info::device::architecture>();
switch (arch) {
case arch_type::intel_gpu_pvc : return "intel_gpu_pvc";
case arch_type::intel_gpu_acm_g12 : return "intel_gpu_acm_g12";
case arch_type::intel_gpu_acm_g11 : return "intel_gpu_acm_g11";
case arch_type::intel_gpu_acm_g10 : return "intel_gpu_acm_g10";
case arch_type::intel_gpu_adl_n : return "intel_gpu_adl_n";
case arch_type::intel_gpu_adl_p : return "intel_gpu_adl_p";
case arch_type::intel_gpu_adl_s : return "intel_gpu_adl_s";
case arch_type::intel_gpu_rkl : return "intel_gpu_rkl";
case arch_type::intel_gpu_ehl : return "intel_gpu_ehl";
case arch_type::intel_gpu_12_10_0 : return "intel_gpu_12_10";
case arch_type::intel_gpu_12_0_0 : return "intel_gpu_12_0_";
case arch_type::intel_gpu_11_0_0 : return "intel_gpu_11_0_0";
case arch_type::intel_gpu_9_7_0 : return "intel_gpu_9_7_0";
case arch_type::intel_gpu_9_6_0 : return "intel_gpu_9_6_0";
case arch_type::intel_gpu_9_5_0 : return "intel_gpu_9_5_0";
case arch_type::intel_gpu_9_4_0 : return "intel_gpu_9_4_0";
case arch_type::intel_gpu_9_3_0 : return "intel_gpu_9_3_0";
case arch_type::intel_gpu_9_2_9 : return "intel_gpu_9_2_9";
case arch_type::intel_gpu_9_1_9 : return "intel_gpu_9_1_9";
case arch_type::intel_gpu_9_0_9 : return "intel_gpu_9_0_9";
case arch_type::intel_gpu_8_0_0 : return "intel_gpu_8_0_0";
case arch_type::nvidia_gpu_sm_50 : return "nvidia_gpu_sm_50";
case arch_type::nvidia_gpu_sm_52 : return "nvidia_gpu_sm_52";
case arch_type::nvidia_gpu_sm_53 : return "nvidia_gpu_sm_53";
case arch_type::nvidia_gpu_sm_60 : return "nvidia_gpu_sm_60";
case arch_type::nvidia_gpu_sm_61 : return "nvidia_gpu_sm_61";
case arch_type::nvidia_gpu_sm_62 : return "nvidia_gpu_sm_62";
case arch_type::nvidia_gpu_sm_70 : return "nvidia_gpu_sm_70";
case arch_type::nvidia_gpu_sm_72 : return "nvidia_gpu_sm_72";
case arch_type::nvidia_gpu_sm_75 : return "nvidia_gpu_sm_75";
case arch_type::nvidia_gpu_sm_80 : return "nvidia_gpu_sm_80";
case arch_type::nvidia_gpu_sm_86 : return "nvidia_gpu_sm_86";
case arch_type::nvidia_gpu_sm_87 : return "nvidia_gpu_sm_87";
case arch_type::nvidia_gpu_sm_89 : return "nvidia_gpu_sm_89";
case arch_type::nvidia_gpu_sm_90 : return "nvidia_gpu_sm_90";
case arch_type::amd_gpu_gfx700 : return "amd_gpu_gfx700";
case arch_type::amd_gpu_gfx701 : return "amd_gpu_gfx701";
case arch_type::amd_gpu_gfx702 : return "amd_gpu_gfx702";
case arch_type::amd_gpu_gfx801 : return "amd_gpu_gfx801";
case arch_type::amd_gpu_gfx802 : return "amd_gpu_gfx802";
case arch_type::amd_gpu_gfx803 : return "amd_gpu_gfx803";
case arch_type::amd_gpu_gfx805 : return "amd_gpu_gfx805";
case arch_type::amd_gpu_gfx810 : return "amd_gpu_gfx810";
case arch_type::amd_gpu_gfx900 : return "amd_gpu_gfx900";
case arch_type::amd_gpu_gfx902 : return "amd_gpu_gfx902";
case arch_type::amd_gpu_gfx904 : return "amd_gpu_gfx904";
case arch_type::amd_gpu_gfx906 : return "amd_gpu_gfx906";
case arch_type::amd_gpu_gfx908 : return "amd_gpu_gfx908";
// case arch_type::amd_gpu_gfx909 : return "amd_gpu_gfx909";
case arch_type::amd_gpu_gfx90a : return "amd_gpu_gfx90a";
// case arch_type::amd_gpu_gfx90c : return "amd_gpu_gfx90c";
// case arch_type::amd_gpu_gfx940 : return "amd_gpu_gfx940";
// case arch_type::amd_gpu_gfx941 : return "amd_gpu_gfx941";
// case arch_type::amd_gpu_gfx942 : return "amd_gpu_gfx942";
case arch_type::amd_gpu_gfx1010 : return "amd_gpu_gfx1010";
case arch_type::amd_gpu_gfx1011 : return "amd_gpu_gfx1011";
case arch_type::amd_gpu_gfx1012 : return "amd_gpu_gfx1012";
case arch_type::amd_gpu_gfx1013 : return "amd_gpu_gfx1013";
case arch_type::amd_gpu_gfx1030 : return "amd_gpu_gfx1030";
case arch_type::amd_gpu_gfx1031 : return "amd_gpu_gfx1031";
case arch_type::amd_gpu_gfx1032 : return "amd_gpu_gfx1032";
// case arch_type::amd_gpu_gfx1033 : return "amd_gpu_gfx1033";
case arch_type::amd_gpu_gfx1034 : return "amd_gpu_gfx1034";
// case arch_type::amd_gpu_gfx1035 : return "amd_gpu_gfx1035";
// case arch_type::amd_gpu_gfx1036 : return "amd_gpu_gfx1036";
// case arch_type::amd_gpu_gfx1100 : return "amd_gpu_gfx1100";
// case arch_type::amd_gpu_gfx1101 : return "amd_gpu_gfx1101";
// case arch_type::amd_gpu_gfx1102 : return "amd_gpu_gfx1102";
// case arch_type::amd_gpu_gfx1103 : return "amd_gpu_gfx1103";
// case arch_type::amd_gpu_gfx1150 : return "amd_gpu_gfx1150";
// case arch_type::amd_gpu_gfx1151 : return "amd_gpu_gfx1151";
// case arch_type::amd_gpu_gfx1200 : return "amd_gpu_gfx1200";
// case arch_type::amd_gpu_gfx1201 : return "amd_gpu_gfx1201";
default: return "";
}
#else
return "";
#endif
}

void setDeviceArchCompilerFlags(const ::sycl::device& sycl_device,
std::string& compiler_flags) {

const std::string arch_string = getDeviceArchString(sycl_device);
if (std::string::npos == compiler_flags.find("-fsycl-targets")) {
if (!arch_string.empty()) {
compiler_flags += " -fsycl-targets=" + arch_string;
}
}
if (std::string::npos == compiler_flags.find("-ftarget-register-alloc-mode")) {
if (std::string::npos != arch_string.find("pvc")) {
compiler_flags += " -Xsycl-target-backend '-options -ze-intel-enable-auto-large-GRF-mode'";
}
}
}

occa::dpcpp::device& getDpcppDevice(modeDevice_t* device_)
{
occa::dpcpp::device* dpcppDevice = dynamic_cast<occa::dpcpp::device*>(device_);
Expand Down
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,6 @@ endforeach()

# cvode
if (ENABLE_CVODE)
message(FATAL_ERROR "CVODE is not currently supported!")

# workaround to resolve name conflict (used be OCCA and CVODE)
set(ENABLE_CUDA ${OCCA_CUDA_ENABLED})
set(ENABLE_HIP ${OCCA_HIP_ENABLED})
Expand Down
9 changes: 6 additions & 3 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
* [ADIOS2](https://adios2.readthedocs.io/) field file writer
* Addtional output options (element filter and interpolation on uniform grid / different polynomial-order)
* Multi session nek-nek including multi-rate time stepping
* Combined CG for improved performance
* CHT nek-nek support
* nek-nek support for nrsqsub scripts
* Improved JIT compilation performance
* HIP support for BoomerAMG
* Intel GPU support
* Aero forces
* User friendly opSEM class
* opSEM class
* Mesh surface ops
* Linear implicit velocity source term
* Combined CG for improved performance
* Various bug fixes

## Good to know
Expand Down Expand Up @@ -62,7 +65,7 @@ This list provides an overview of the most significant changes in this release,

## Thanks to our Contributors

@kris-rowe, @MalachiTimothyPhillips, @yslan, @tcew
@kris-rowe, @yslan, @MalachiTimothyPhillips, @tcew

We are grateful to all who added new features, filed issues or helped resolve them,
asked and answered questions, and were part of inspiring discussions.
Expand Down
6 changes: 3 additions & 3 deletions cmake/bench.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ target_link_libraries(fdm-bin PRIVATE nekrs-lib)

set(BENCH_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/bench)
set(BENCH_SOURCES
${BENCH_SOURCE_DIR}/fdm/benchmarkFDM.cpp
${BENCH_SOURCE_DIR}/axHelm/benchmarkAx.cpp
${BENCH_SOURCE_DIR}/advsub/benchmarkAdvsub.cpp
${BENCH_SOURCE_DIR}/fdm/benchmark.cpp
${BENCH_SOURCE_DIR}/axHelm/benchmark.cpp
${BENCH_SOURCE_DIR}/advsub/benchmark.cpp
${BENCH_SOURCE_DIR}/core/kernelBenchmarker.cpp
)
4 changes: 2 additions & 2 deletions examples/conj_ht/ci.inc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
static int ciMode = 0;
double numCVODERHS = 0;

#define EPS 1e-3
#define EPS 5e-3

void ciSetup(MPI_Comm comm, setupAide &options)
{
Expand Down Expand Up @@ -52,7 +52,7 @@ void ciTestPointInterpolation(nrs_t *nrs, double time, int tstep)
return;
}

auto interp = pointInterpolation_t(nrs->cds->mesh[0]);
auto interp = pointInterpolation_t(nrs->cds->mesh[0], platform->comm.mpiComm);

std::vector<dfloat> x = {4, 4, 4};
std::vector<dfloat> y = {-0.25, 0.25, 1.0};
Expand Down
Loading

0 comments on commit 74469ff

Please sign in to comment.