Skip to content

Commit

Permalink
CUDA On Windows (MSVC) (#4309)
Browse files Browse the repository at this point in the history
Implement various workarounds for CUDA on Windows (with MSVC + NVCC).
- Redo some kernel launch macros because they do not work in function
templates.
- Work around the limitation that the extended lambda does not work
inside `if constexpr` blocks.
  - Inline some function templates to avoid function mismatch.
  - Remove some SFINAE that do not work for CUDA on Windows.
- Replace small with sml because a windows system header has `#define
small char`.
  - Implement isnan and isinf for device code.
  - Disable probinit.
- Skip DeviceGlobal and single precision linear solver tests because of
certain limitations.

Add CUDA On Windows CI with cuda toolkit installed by
Jimver/cuda-toolkit.
  • Loading branch information
WeiqunZhang authored Jan 24, 2025
1 parent 0f46a16 commit 4b34f9c
Show file tree
Hide file tree
Showing 33 changed files with 570 additions and 453 deletions.
31 changes: 30 additions & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
if: needs.check_changes.outputs.has_non_docs_changes == 'true'
steps:
- uses: actions/checkout@v4
# If we add ccache back, don't forget to update cleanup-cache.yml
#- name: Set Up Cache
# uses: actions/cache@v3
# with:
Expand Down Expand Up @@ -66,6 +67,7 @@ jobs:
if: needs.check_changes.outputs.has_non_docs_changes == 'true'
steps:
- uses: actions/checkout@v4
# If we add ccache back, don't forget to update cleanup-cache.yml
#- name: Set Up Cache
# uses: actions/cache@v3
# with:
Expand Down Expand Up @@ -144,4 +146,31 @@ jobs:
set "PATH=%PATH%;D:\\a\amrex\amrex\installdir\bin"
cmake --build build --config Release --target test_install
# If we add ccache back, don't forget to update cleanup-cache.yml
tests_cuda:
name: CUDA on Windows
runs-on: windows-latest
needs: check_changes
if: needs.check_changes.outputs.has_non_docs_changes == 'true'
steps:
- uses: Jimver/[email protected]
id: cuda-toolkit
with:
cuda: '12.6.1'
use-github-cache: 'false'
- uses: actions/checkout@v4
- name: Compile
run: |
cmake -S . -B build `
-DCMAKE_VERBOSE_MAKEFILE=ON `
-DCMAKE_BUILD_TYPE=Release `
-DAMReX_GPU_BACKEND=CUDA `
-DAMReX_CUDA_ARCH="8.0" `
-DAMReX_ENABLE_TESTS=ON `
-DAMReX_EB=ON `
-DAMReX_FFT=ON `
-DAMReX_LINEAR_SOLVERS=ON `
-DAMReX_PARTICLES=ON `
-DAMReX_FORTRAN=OFF `
-DAMReX_MPI=OFF
cmake --build build --config Release -j 4
cmake --build build --config Release --target install
8 changes: 4 additions & 4 deletions Src/Base/AMReX_BoxList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ BoxList::BoxList(const Box& bx, const IntVect& tilesize)
ntiles *= nt[d];
}

IntVect small, big, ijk; // note that the initial values are all zero.
IntVect sml, big, ijk; // note that the initial values are all zero.
ijk[0] = -1;
for (int t=0; t<ntiles; ++t) {
for (int d=0; d<AMREX_SPACEDIM; d++) {
Expand All @@ -201,11 +201,11 @@ BoxList::BoxList(const Box& bx, const IntVect& tilesize)
}

for (int d=0; d<AMREX_SPACEDIM; d++) {
small[d] = ijk[d]*tilesize[d];
big[d] = std::min(small[d]+tilesize[d]-1, bx.length(d)-1);
sml[d] = ijk[d]*tilesize[d];
big[d] = std::min(sml[d]+tilesize[d]-1, bx.length(d)-1);
}

Box tbx(small, big, btype);
Box tbx(sml, big, btype);
tbx.shift(bx.smallEnd());
push_back(tbx);
}
Expand Down
259 changes: 87 additions & 172 deletions Src/Base/AMReX_FabArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -537,55 +537,117 @@ public:
FAB const* fabPtr (int K) const noexcept;

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
void prefetchToHost (const MFIter& mfi) const noexcept;
void prefetchToHost (const MFIter& mfi) const noexcept
{
#ifdef AMREX_USE_CUDA
this->fabPtr(mfi)->prefetchToHost();
#else
amrex::ignore_unused(mfi);
#endif
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
void prefetchToDevice (const MFIter& mfi) const noexcept;
void prefetchToDevice (const MFIter& mfi) const noexcept
{
#ifdef AMREX_USE_CUDA
this->fabPtr(mfi)->prefetchToDevice();
#else
amrex::ignore_unused(mfi);
#endif
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept
{
return fabPtr(mfi)->const_array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept;
//
Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept
{
return fabPtr(mfi)->array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept
{
return fabPtr(K)->const_array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type> array (int K) noexcept;
Array4<typename FabArray<FAB>::value_type> array (int K) noexcept
{
return fabPtr(K)->array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept
{
return fabPtr(mfi)->const_array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept;
Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept
{
return fabPtr(K)->const_array();
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept
{
return fabPtr(mfi)->const_array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept;
//
Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept
{
return fabPtr(mfi)->array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept
{
return fabPtr(K)->const_array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept;
Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept
{
return fabPtr(K)->array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept;
//
Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept
{
return fabPtr(mfi)->const_array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept;
Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept
{
return fabPtr(K)->const_array(start_comp);
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept;
MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept
{
build_arrays();
return m_arrays;
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept;
MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept
{
build_arrays();
return m_const_arrays;
}

template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept;
MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept
{
build_arrays();
return m_const_arrays;
}

//! Explicitly set the Kth FAB in the FabArray to point to elem.
void setFab (int boxno, std::unique_ptr<FAB> elem);
Expand Down Expand Up @@ -1531,153 +1593,6 @@ FabArray<FAB>::fabPtr (int K) const noexcept
return m_fabs_v[li];
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
void
FabArray<FAB>::prefetchToHost (const MFIter& mfi) const noexcept
{
#ifdef AMREX_USE_CUDA
this->fabPtr(mfi)->prefetchToHost();
#else
amrex::ignore_unused(mfi);
#endif
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
void
FabArray<FAB>::prefetchToDevice (const MFIter& mfi) const noexcept
{
#ifdef AMREX_USE_CUDA
this->fabPtr(mfi)->prefetchToDevice();
#else
amrex::ignore_unused(mfi);
#endif
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::array (const MFIter& mfi) const noexcept
{
return fabPtr(mfi)->const_array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type>
FabArray<FAB>::array (const MFIter& mfi) noexcept
{
return fabPtr(mfi)->array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::array (int K) const noexcept
{
return fabPtr(K)->const_array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type>
FabArray<FAB>::array (int K) noexcept
{
return fabPtr(K)->array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::const_array (const MFIter& mfi) const noexcept
{
return fabPtr(mfi)->const_array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::const_array (int K) const noexcept
{
return fabPtr(K)->const_array();
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::array (const MFIter& mfi, int start_comp) const noexcept
{
return fabPtr(mfi)->const_array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type>
FabArray<FAB>::array (const MFIter& mfi, int start_comp) noexcept
{
return fabPtr(mfi)->array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::array (int K, int start_comp) const noexcept
{
return fabPtr(K)->const_array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type>
FabArray<FAB>::array (int K, int start_comp) noexcept
{
return fabPtr(K)->array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::const_array (const MFIter& mfi, int start_comp) const noexcept
{
return fabPtr(mfi)->const_array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
Array4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::const_array (int K, int start_comp) const noexcept
{
return fabPtr(K)->const_array(start_comp);
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
MultiArray4<typename FabArray<FAB>::value_type>
FabArray<FAB>::arrays () noexcept
{
build_arrays();
return m_arrays;
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
MultiArray4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::arrays () const noexcept
{
build_arrays();
return m_const_arrays;
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
MultiArray4<typename FabArray<FAB>::value_type const>
FabArray<FAB>::const_arrays () const noexcept
{
build_arrays();
return m_const_arrays;
}

template <class FAB>
template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
void
Expand Down
12 changes: 6 additions & 6 deletions Src/Base/AMReX_FabArrayBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2337,7 +2337,7 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const
ntiles *= nt_in_fab[d];
}

IntVect small, big, ijk; // note that the initial values are all zero.
IntVect sml, big, ijk; // note that the initial values are all zero.
ijk[0] = -1;
for (int t = 0; t < ntiles; ++t) {
ta.indexMap.push_back(K);
Expand All @@ -2356,15 +2356,15 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const

for (int d=0; d<AMREX_SPACEDIM; d++) {
if (ijk[d] < nleft[d]) {
small[d] = ijk[d]*(tsize[d]+1);
big[d] = small[d] + tsize[d];
sml[d] = ijk[d]*(tsize[d]+1);
big[d] = sml[d] + tsize[d];
} else {
small[d] = ijk[d]*tsize[d] + nleft[d];
big[d] = small[d] + tsize[d] - 1;
sml[d] = ijk[d]*tsize[d] + nleft[d];
big[d] = sml[d] + tsize[d] - 1;
}
}

Box tbx(small, big, IndexType::TheCellType());
Box tbx(sml, big, IndexType::TheCellType());
tbx.shift(bx.smallEnd());

ta.tileArray.push_back(tbx);
Expand Down
Loading

0 comments on commit 4b34f9c

Please sign in to comment.