diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 942920275..34c636131 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ ############################################################################### # Copyright (c) 2017-22, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. -# See the RAJAPerf/COPYRIGHT file for details. +# See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### @@ -11,97 +11,52 @@ # at Lawrence Livermore National Laboratory (LLNL). # # This entire pipeline is LLNL-specific -# ############################################################################# - -# We define the following GitLab pipeline variables: -# -# GIT_SUBMODULE_STRATEGY: -# Tells Gitlab to recursively update the submodules when cloning umpire -# -# ALLOC_NAME: -# On LLNL's ruby, this pipeline creates only one allocation shared among jobs -# in order to save time and resources. This allocation has to be uniquely named -# so that we are sure to retrieve it. # -# BUILD_ROOT: -# The path to the shared resources between all jobs. The BUILD_ROOT is unique to -# the pipeline, preventing any form of concurrency with other pipelines. This -# also means that the BUILD_ROOT directory will never be cleaned. +# Important note: This file is a copy of the template provided by +# llnl/radiuss-shared-ci. It should not require any change from the project to +# get started but could feature project-specific stages. # -# DEFAULT_TIME: -# Default time to let the Lassen jobs run will be 30 minutes. However, if it is -# a job that requires more time, it will be overwritten in the lassen template -# file. -# TODO: add a clean-up mechanism +# Instead, each project should provide: +# - .gitlab/subscribed-pipelines.yml +# - .gitlab/custom-jobs-and-variables.yml +# - .gitlab/${MACHINE}-build-and-test-extra.yml +############################################################################### +# We define the following GitLab pipeline variables: variables: +# Use the umdev service user to run CI. This prevents from running pipelines as +# an actual user. + LLNL_SERVICE_USER: umdev +# Use the service user workspace. Solves permission issues, stores everything +# at the same location whoever triggers a pipeline. + CUSTOM_CI_BUILDS_DIR: /usr/workspace/umdev/gitlab-runner +# Tells Gitlab to recursively update the submodules when cloning the project. GIT_SUBMODULE_STRATEGY: recursive - ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} +# We build the projects in the CI clone directory. +# TODO: add a clean-up mechanism BUILD_ROOT: ${CI_PROJECT_DIR} - DEFAULT_TIME: 30 - MP_BRANCH: "develop" -# Normally, stages are blocking in Gitlab. However, using the keyword "needs" we -# can express dependencies between job that break the ordering of stages, in -# favor of a DAG. -# In practice r_*, l_* and b_* stages are independently run and start immediately. +# We organize the CI on Gitlab in sub-pipelines. Each sub-pipeline corresponds +# to a test phase on a given machine. +# High level stages stages: - - r_allocate_resources - - r_build_and_test - - r_release_resources - - l_build_and_test - - c_allocate_resources - - c_build_and_test - - c_release_resources - -# This is the rules that drives the activation of "advanced" jobs. All advanced -# jobs will share this through a template mechanism. -.advanced_pipeline: - rules: - - if: '$CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "develop" || $ALL_TARGETS == "ON"' #run only if ... - -# These are also templates (.name) that define project specific build commands. -# If an allocation exist with the name defined in this pipeline, the job will -# use it (slurm specific). -.build_toss_3_x86_64_ib_script: - script: - - echo ${ALLOC_NAME} - - export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A) - - echo ${JOBID} - - srun $( [[ -n "${JOBID}" ]] && echo "--jobid=${JOBID}" ) -t ${DEFAULT_TIME} -N 1 scripts/gitlab/build_and_test.sh - artifacts: - reports: - junit: junit.xml - -.build_toss_4_x86_64_ib_corona_script: - script: - - srun -p pbatch -t 30 -N 1 scripts/gitlab/build_and_test.sh - -# Lassen and Butte use a different job scheduler (spectrum lsf) that does not -# allow pre-allocation the same way slurm does. -.build_blueos_3_ppc64le_ib_script: - script: - - lalloc 1 -W ${DEFAULT_TIME} scripts/gitlab/build_and_test.sh - artifacts: - reports: - junit: junit.xml - -.build_blueos_3_ppc64le_ib_ats_disabled_script: - script: - - lalloc 1 --atsdisable -W ${DEFAULT_TIME} scripts/gitlab/build_and_test.sh - artifacts: - reports: - junit: junit.xml - -.build_blueos_3_ppc64le_ib_p9_script: - extends: .build_blueos_3_ppc64le_ib_script - -# This is where jobs are included. + - build-and-test + +# Template for jobs triggering a build-and-test sub-pipelines: +.build-and-test: + stage: build-and-test + trigger: + include: + - local: '.gitlab/custom-jobs-and-variables.yml' + - project: 'radiuss/radiuss-shared-ci' + ref: v2022.09.0 + file: '${CI_MACHINE}-build-and-test.yml' + - local: '.gitlab/${CI_MACHINE}-build-and-test-extra.yml' + strategy: depend + forward: + pipeline_variables: true + +# pipelines subscribed by the project include: - - local: .gitlab/ruby-templates.yml - - local: .gitlab/ruby-jobs.yml - - local: .gitlab/lassen-templates.yml - - local: .gitlab/lassen-jobs.yml - - local: .gitlab/corona-templates.yml - - local: .gitlab/corona-jobs.yml + - local: .gitlab/subscribed-pipelines.yml diff --git a/.gitlab/corona-build-and-test-extra.yml b/.gitlab/corona-build-and-test-extra.yml new file mode 100644 index 000000000..1dd6b9bed --- /dev/null +++ b/.gitlab/corona-build-and-test-extra.yml @@ -0,0 +1,33 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################# + +######################## +# Overridden shared jobs +######################## +# We duplicate the shared jobs description and add necessary changes for RAJA. +# We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that +# the comparison with the original job is easier. + +# No overridden jobs so far. + +############ +# Extra jobs +############ +# We do not recommend using ${PROJECT__VARIANTS} and +# ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully +# describe the spec here. + +# With GitLab CI, included files cannot be empty. +variables: + INCLUDED_FILE_CANNOT_BE_EMPTY: "True" + +# INFO: This job is activated in RAJA CI, but we don't use desul atomics here +#rocm_5_1_1_clang_13_0_0_desul_atomics: +# variables: +# SPEC: "+rocm~openmp +desul amdgpu_target=gfx906 %clang@13.0.0 ^blt@develop ^hip@5.1.1" +# extends: .build_and_test_on_corona diff --git a/.gitlab/corona-jobs.yml b/.gitlab/corona-jobs.yml deleted file mode 100644 index 4b9428f3a..000000000 --- a/.gitlab/corona-jobs.yml +++ /dev/null @@ -1,16 +0,0 @@ -############################################################################# -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################# - -hip_5.1.0_clang_13_0_0 (build and test on corona): - variables: - SPEC: "+rocm~openmp amdgpu_target=gfx906 %clang@13.0.0 ^blt@develop ^hip@5.1.0" - extends: .build_and_test_on_corona - -#hip_5.1.0_clang_13_0_0_desul_atomics (build and test on corona): -# variables: -# SPEC: "+rocm~openmp +desul amdgpu_target=gfx906 %clang@13.0.0 ^blt@develop ^hip@5.1.0" -# extends: .build_and_test_on_corona diff --git a/.gitlab/corona-templates.yml b/.gitlab/corona-templates.yml deleted file mode 100644 index 4e1a5cb74..000000000 --- a/.gitlab/corona-templates.yml +++ /dev/null @@ -1,33 +0,0 @@ -############################################################################# -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################# - -#### -# This is the share configuration of jobs for corona - -#### -# In pre-build phase, allocate a node for builds -.on_corona: - tags: - - shell - - corona - rules: - - if: '$ON_CORONA == "OFF"' #run except if ... - when: never - - if: '$CI_JOB_NAME =~ /release_resources/' - when: always - - when: on_success - -#### -# Generic corona build job, extending build script -.build_and_test_on_corona: - stage: c_build_and_test - extends: [.build_toss_4_x86_64_ib_corona_script, .on_corona] - needs: [] - -.build_and_test_on_corona_advanced: - extends: [.build_and_test_on_corona, .advanced_pipeline] - diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml new file mode 100644 index 000000000..a7c72805a --- /dev/null +++ b/.gitlab/custom-jobs-and-variables.yml @@ -0,0 +1,53 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +# We define the following GitLab pipeline variables: +variables: + +# On LLNL's ruby, this pipeline creates only one allocation shared among jobs +# in order to save time and resources. This allocation has to be uniquely named +# so that we are sure to retrieve it and avoid collisions. + ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} + +# Ruby +# Arguments for top level allocation + RUBY_BUILD_AND_TEST_SHARED_ALLOC: "--exclusive --partition=pdebug --time=45 --nodes=1" +# Arguments for job level allocation + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=30 --nodes=1" +# Project specific variants for ruby + PROJECT_RUBY_VARIANTS: "+openmp " +# Project specific deps for ruby + PROJECT_RUBY_DEPS: "" + +# Corona +# Arguments for top level allocation + CORONA_BUILD_AND_TEST_SHARED_ALLOC: "--time-limit=60m --nodes=1" +# Arguments for job level allocation + CORONA_BUILD_AND_TEST_JOB_ALLOC: "--time-limit=30m --nodes=1" +# Project specific variants for corona + PROJECT_CORONA_VARIANTS: "~openmp " +# Project specific deps for corona + PROJECT_CORONA_DEPS: "^blt@develop " + +# Lassen and Butte use a different job scheduler (spectrum lsf) that does not +# allow pre-allocation the same way slurm does. +# Arguments for job level allocation + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 30" +# Project specific variants for lassen + PROJECT_LASSEN_VARIANTS: "+openmp " +# Project specific deps for lassen + PROJECT_LASSEN_DEPS: "" + +# Configuration shared by build and test jobs specific to this project. +# Not all configuration can be shared. Here is here projects can fine tune the +# CI behavior. +# See Umpire for an example (export junit test reports). +.custom_build_and_test: + artifacts: + reports: + junit: junit.xml diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml new file mode 100644 index 000000000..aad9c6c2e --- /dev/null +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -0,0 +1,138 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +######################## +# Overridden shared jobs +######################## +# We duplicate the shared jobs description and add necessary changes for RAJA. +# We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that +# the comparison with the original job is easier. + +# Overriding shared spec: Allow failures +ibm_clang_9_0_0: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %clang@ibm.9.0.0 ${PROJECT_LASSEN_DEPS}" + extends: .build_and_test_on_lassen + allow_failure: true + +# Overriding shared spec: Allow failures +ibm_clang_9_0_0_gcc_8_3_1: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %clang@ibm.9.0.0 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + extends: .build_and_test_on_lassen + allow_failure: true + +# Overriding shared spec: Extra flags +gcc_8_3_1: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %gcc@8.3.1 cxxflags=\"-finline-functions -finline-limit=20000\" cflags=\"-finline-functions -finline-limit=20000\" ${PROJECT_LASSEN_DEPS}" + extends: .build_and_test_on_lassen + +# Overriding shared spec: Longer allocation + Allow failures +pgi_20_4_gcc_8_3_1: + extends: .build_and_test_on_lassen + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %pgi@20.4 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" + allow_failure: true + +# Overriding shared spec: Longer allocation + Extra flags +xl_16_1_1_12: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags=\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036 ${PROJECT_LASSEN_DEPS}\"" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 50" + extends: .build_and_test_on_lassen + +# Overriding shared spec: Longer allocation + Extra flags +xl_16_1_1_12_gcc_8_3_1: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 50" + extends: .build_and_test_on_lassen + +# Overriding shared spec: Longer allocation + Allow failures +ibm_clang_9_0_0_gcc_8_3_1_cuda_10_1_168: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %clang@ibm.9.0.0 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@10.1.168 ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" + extends: .build_and_test_on_lassen + allow_failure: true + +# Overriding shared spec: Extra flags + Longer allocation + Allow failure +xl_16_1_1_12_cuda_11_1_0: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags=\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" + allow_failure: true + extends: .build_and_test_on_lassen + +# Overriding shared spec: Extra flags + Longer allocation + Allow failure +xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags\"=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" + allow_failure: true + extends: .build_and_test_on_lassen + + +############ +# Extra jobs +############ +# We do not recommend using ${PROJECT__VARIANTS} and +# ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully +# describe the spec here. + +########## +# CPU ONLY +########## + +clang_14_0_5: + variables: + SPEC: " +openmp %clang@14.0.5" + extends: .build_and_test_on_lassen + +########## +# CUDA +########## + +clang_12_0_1_cuda_11_5_0: + variables: + SPEC: " +openmp +cuda cuda_arch=70 %clang@12.0.1 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.5.0" + extends: .build_and_test_on_lassen + +gcc_8_3_1_cuda_11_1_0: + variables: + SPEC: " +openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.1.0" + extends: .build_and_test_on_lassen + +gcc_8_3_1_cuda_11_5_0_ats_disabled: + extends: .build_and_test_on_lassen + variables: + SPEC: " +openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.5.0" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 --atsdisable -W 30" + +########## +# EXTRAS +########## + +clang_13_0_1_libcpp: + variables: + SPEC: " +openmp %clang@13.0.1+libcpp" + extends: .build_and_test_on_lassen + +clang_14_0_5_asan: + variables: + SPEC: " +openmp %clang@14.0.5 cxxflags=-fsanitize=address" + ASAN_OPTIONS: "detect_leaks=1" + extends: .build_and_test_on_lassen + +# Activated in RAJA, but we don't use desul atomics here +#gcc_8_3_1_cuda_10_1_168_desul_atomics: +# variables: +# SPEC: "+openmp +cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" +# extends: .build_and_test_on_lassen diff --git a/.gitlab/lassen-jobs.yml b/.gitlab/lassen-jobs.yml deleted file mode 100644 index 034de13eb..000000000 --- a/.gitlab/lassen-jobs.yml +++ /dev/null @@ -1,98 +0,0 @@ -############################################################################## -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################## - -########## -# CPU ONLY -########## - -#ibm_clang_10_0_1: -# variables: -# SPEC: "%clang@ibm.10.0.1" -# extends: .build_and_test_on_lassen - -clang_11_0_0: - variables: - SPEC: "+openmp %clang@11.0.0" - extends: .build_and_test_on_lassen - -#ibm_clang_9_gcc_8: -# variables: -# SPEC: "%clang@ibm.9.0.0 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" -# extends: .build_and_test_on_lassen - -gcc_8_3_1: - variables: - SPEC: "+openmp %gcc@8.3.1 cxxflags='-finline-functions -finline-limit=20000' cflags='-finline-functions -finline-limit=20000'" - extends: .build_and_test_on_lassen - -xl_16_1_1_12: - variables: - SPEC: "+openmp %xl@16.1.1.12 cxxflags='-qthreaded -std=c++14 -O2 -qstrict -qxlcompatmacros -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036'" - DEFAULT_TIME: 50 - extends: .build_and_test_on_lassen - -xl_16_1_1_12_gcc_8_3_1: - variables: - SPEC: "+openmp %xl@16.1.1.12 cxxflags='--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O2 -qstrict -qxlcompatmacros -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036' cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" - DEFAULT_TIME: 50 - extends: .build_and_test_on_lassen - -########## -# CUDA -########## - -#ibm_clang_9_cuda: -# variables: -# SPEC: "+cuda cuda_arch=70 %clang@ibm.9.0.0 ^cuda@10.1.168" -# extends: .build_and_test_on_lassen - -clang_11_cuda: - variables: - SPEC: "+openmp +cuda cuda_arch=70 %clang@11.0.0 ^cuda@10.1.168" - extends: .build_and_test_on_lassen - -gcc_8_3_1_cuda: - variables: - SPEC: "+openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" - extends: .build_and_test_on_lassen - -gcc_8_3_1_cuda_ats_disabled: - variables: - SPEC: "+openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" - extends: .build_and_test_on_lassen_ats_disabled - -xl_16_1_1_12_cuda: - variables: - SPEC: "+openmp +cuda %xl@16.1.1.12 cxxflags='-qthreaded -std=c++14 -O2 -qstrict -qxlcompatmacros -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036' cuda_arch=70 ^cuda@10.1.168 ^cmake@3.14.5" - DEFAULT_TIME: 60 - extends: .build_and_test_on_lassen - -xl_16_1_1_12_gcc_8_3_1_cuda_11: - variables: - SPEC: "+openmp +cuda %xl@16.1.1.12 cuda_arch=70 cxxflags'=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O2 -qstrict -qxlcompatmacros -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036' cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 ^cuda@11.0.2 ^cmake@3.14.5" - DEFAULT_TIME: 60 - extends: .build_and_test_on_lassen - -########## -# EXTRAS -########## - -clang_9_0_0_libcpp (build and test on lassen): - variables: - SPEC: "+openmp %clang@9.0.0+libcpp" - extends: .build_and_test_on_lassen - -clang_9_0_0_memleak (build and test on lassen): - variables: - SPEC: "+openmp %clang@9.0.0 cxxflags=-fsanitize=address" - ASAN_OPTIONS: "detect_leaks=1" - extends: .build_and_test_on_lassen - -#gcc_8_3_1_cuda_desul_atomics: -# variables: -# SPEC: "+cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" -# extends: .build_and_test_on_lassen diff --git a/.gitlab/lassen-templates.yml b/.gitlab/lassen-templates.yml deleted file mode 100644 index dbc340f22..000000000 --- a/.gitlab/lassen-templates.yml +++ /dev/null @@ -1,34 +0,0 @@ -############################################################################## -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################## - -#### -# Shared configuration of jobs for lassen -.on_lassen: - variables: - tags: - - shell - - lassen - rules: - - if: '$CI_COMMIT_BRANCH =~ /_lnone/ || $ON_LASSEN == "OFF"' #run except if ... - when: never - - when: on_success - -.build_and_test_on_lassen: - stage: l_build_and_test - extends: [.build_blueos_3_ppc64le_ib_p9_script, .on_lassen] - needs: [] - -.build_and_test_on_lassen_ats_disabled: - stage: l_build_and_test - extends: [.build_blueos_3_ppc64le_ib_ats_disabled_script, .on_lassen] - needs: [] - -# Note: .build_and_test_on_lassen_advanced inherits from -# .build_and_test_on_lassen and .advanced_pileline. -# In particular, the rules section will be merged. Careful when changing rules. -.build_and_test_on_lassen_advanced: - extends: [.build_and_test_on_lassen, .advanced_pipeline] diff --git a/.gitlab/ruby-build-and-test-extra.yml b/.gitlab/ruby-build-and-test-extra.yml new file mode 100644 index 000000000..68a6bf0d7 --- /dev/null +++ b/.gitlab/ruby-build-and-test-extra.yml @@ -0,0 +1,42 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +######################## +# Overridden shared jobs +######################## +# We duplicate the shared jobs description and add necessary changes for RAJA. +# We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that +# the comparison with the original job is easier. + +# Overriding shared config for longer run +gcc_8_1_0: + variables: + SPEC: " ${PROJECT_RUBY_VARIANTS} %gcc@8.1.0 ${PROJECT_RUBY_DEPS}" + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" + extends: .build_and_test_on_ruby + +# Overriding shared spec: Allow failures +pgi_20_1_gcc_local_8_3_1: + variables: + SPEC: " ${PROJECT_RUBY_VARIANTS} %pgi@20.1 cxxflags\"=-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" cflags\"=-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" fflags=\"-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" ${PROJECT_RUBY_DEPS}" + extends: .build_and_test_on_ruby + allow_failure: true + + +############ +# Extra jobs +############ +# We do not recommend using ${PROJECT__VARIANTS} and +# ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully +# describe the spec here. + +icpc_19_1_0: + variables: + SPEC: " +openmp %intel@19.1.0" + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" + extends: .build_and_test_on_ruby diff --git a/.gitlab/ruby-jobs.yml b/.gitlab/ruby-jobs.yml deleted file mode 100644 index cb4214c6a..000000000 --- a/.gitlab/ruby-jobs.yml +++ /dev/null @@ -1,53 +0,0 @@ -############################################################################## -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################## - -clang_10: - variables: - SPEC: "+openmp %clang@10.0.1" - extends: .build_and_test_on_ruby - -clang_9: - variables: - SPEC: "+openmp %clang@9.0.0" - extends: .build_and_test_on_ruby - -gcc_8_1_0: - variables: - SPEC: "+openmp %gcc@8.1.0" - DEFAULT_TIME: 60 - extends: .build_and_test_on_ruby - -#icpc_17_0_2: -# variables: -# SPEC: "%intel@17.0.2" -# DEFAULT_TIME: 40 -# extends: .build_and_test_on_ruby - -#icpc_18_0_2: -# variables: -# SPEC: " tests=none %intel@18.0.2" -# DEFAULT_TIME: 40 -# extends: .build_and_test_on_ruby - -icpc_19_1_0: - variables: - SPEC: "+openmp %intel@19.1.0" - DEFAULT_TIME: 40 - extends: .build_and_test_on_ruby - -# EXTRAS - -#gcc_4_9_3: -# variables: -# SPEC: "%gcc@4.9.3" -# DEFAULT_TIME: 60 -# extends: .build_and_test_on_ruby - -#clang_10_desul_atomics: -# variables: -# SPEC: "+openmp +desul %clang@10.0.1 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" -# extends: .build_and_test_on_ruby diff --git a/.gitlab/ruby-templates.yml b/.gitlab/ruby-templates.yml deleted file mode 100644 index b1314534b..000000000 --- a/.gitlab/ruby-templates.yml +++ /dev/null @@ -1,54 +0,0 @@ -############################################################################## -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################## - -#### -# This is the shared configuration of jobs for ruby - -#### -# In pre-build phase, allocate a node for builds -.on_ruby: - tags: - - shell - - ruby - rules: - - if: '$CI_COMMIT_BRANCH =~ /_qnone/ || $ON_RUBY == "OFF"' #run except if ... - when: never - - if: '$CI_JOB_NAME =~ /release_resources/' - when: always - - when: on_success - -#### -# In pre-build phase, allocate a node for builds -# NOTE: Not specifying 'salloc -c 56' should allocate the max number of CPU cores -allocate_resources (on ruby): - variables: - GIT_STRATEGY: none - extends: .on_ruby - stage: r_allocate_resources - script: - - salloc -N 1 -p pdebug -t 45 --no-shell --job-name=${ALLOC_NAME} - -#### -# In post-build phase, deallocate resources -# Note : make sure this is run even on build phase failure -release_resources (on ruby): - variables: - GIT_STRATEGY: none - extends: .on_ruby - stage: r_release_resources - script: - - export JOBID=$(squeue -h --name=${ALLOC_NAME} --format=%A) - - ([[ -n "${JOBID}" ]] && scancel ${JOBID}) - -#### -# Generic ruby build job, extending build script -.build_and_test_on_ruby: - extends: [.build_toss_3_x86_64_ib_script, .on_ruby] - stage: r_build_and_test - -.build_and_test_on_ruby_advanced: - extends: [.build_and_test_on_ruby, .advanced_pipeline] diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml new file mode 100644 index 000000000..ede2aebfe --- /dev/null +++ b/.gitlab/subscribed-pipelines.yml @@ -0,0 +1,25 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +# Uncomment pipelines to subscribe to a shared pipeline. + +# Trigger a build-and-test pipeline for ruby, corona and lassen +ruby-build-and-test: + variables: + CI_MACHINE: "ruby" + extends: [.build-and-test] + +corona-build-and-test: + variables: + CI_MACHINE: "corona" + extends: [.build-and-test] + +lassen-build-and-test: + variables: + CI_MACHINE: "lassen" + extends: [.build-and-test] diff --git a/.gitmodules b/.gitmodules index 13f05ecd3..0993afb86 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "tpl/RAJA"] path = tpl/RAJA url = https://github.com/LLNL/RAJA.git +[submodule "tpl/kokkos"] + path = tpl/kokkos + url = https://github.com/kokkos/kokkos diff --git a/.uberenv_config.json b/.uberenv_config.json index 6b9b7ca68..bae8bf1f6 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -3,10 +3,11 @@ "package_version" : "develop", "package_final_phase" : "hostconfig", "package_source_dir" : "../..", -"spack_url": "https://github.com/davidbeckingsale/spack", -"spack_branch": "feature/allow-untested-cuda-versions", -"spack_commit": "46b22d0f6227f6b12bab712bda5b916a53cfc67d", +"spack_url": "https://github.com/spack/spack.git", +"spack_branch": "v0.18.1", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", -"spack_packages_path": "scripts/spack_packages" +"spack_packages_path": "scripts/spack_packages", +"spack_concretizer": "clingo", +"spack_setup_clingo": false } diff --git a/CMakeLists.txt b/CMakeLists.txt index 813d1e9b0..8c7233236 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # @@ -9,10 +9,15 @@ # C is required for googletest to find Threads project(RAJAPerfSuite LANGUAGES CXX C) -cmake_minimum_required(VERSION 3.14.5) +if (ENABLE_HIP) + cmake_minimum_required(VERSION 3.23) +else() + cmake_minimum_required(VERSION 3.20) +endif() option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable this, and all other variants, to run _only_ raw C loops." On) +option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) # # Note: the BLT build system is inheritted by RAJA and is initialized by RAJA @@ -22,8 +27,13 @@ if (PERFSUITE_ENABLE_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") endif() -set(CMAKE_CXX_STANDARD 14) -set(BLT_CXX_STD c++14) +if (ENABLE_KOKKOS) + set(CMAKE_CXX_STANDARD 17) + set(BLT_CXX_STD c++17) +else() + set(CMAKE_CXX_STANDARD 14) + set(BLT_CXX_STD c++14) +endif() include(blt/SetupBLT.cmake) @@ -85,8 +95,8 @@ if (ENABLE_OPENMP) add_definitions(-DRUN_OPENMP) endif () -set(RAJA_PERFSUITE_VERSION_MAJOR 0) -set(RAJA_PERFSUITE_VERSION_MINOR 11) +set(RAJA_PERFSUITE_VERSION_MAJOR 2022) +set(RAJA_PERFSUITE_VERSION_MINOR 10) set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0) set(RAJA_PERFSUITE_DEPENDS RAJA) @@ -100,7 +110,12 @@ endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) endif() -if (ENABLE_HIP) + +# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU +# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler. +# Separate RAJAPerf Suite and Kokkos handling of HIP compilers + +if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) message(STATUS "HIP version: ${hip_VERSION}") if("${hip_VERSION}" VERSION_LESS "3.5") message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ") @@ -113,8 +128,13 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE}) set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) if (ENABLE_CUDA) - set(CMAKE_CUDA_STANDARD 14) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") + if (ENABLE_KOKKOS) + set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr") + else() + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --expt-extended-lambda --expt-relaxed-constexpr") + endif() set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}") list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER}) @@ -135,13 +155,46 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in include_directories($) -# Make sure RAJA flag propagate (we need to do some house cleaning to +# Make sure RAJA flags propagate (we need to do some tidying to # remove project-specific CMake variables that are no longer needed) set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) # # Each directory in the perf suite has its own CMakeLists.txt file. -# + +# ENABLE_KOKKOS is A RAJAPerf Suite Option +if(ENABLE_KOKKOS) + add_definitions(-DRUN_KOKKOS) + if(ENABLE_HIP) + set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the +Kokkos_ENABLE_HIP variable to ON") + endif() + + if(ENABLE_TARGET_OPENMP) + set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") + if(NOT CMAKE_BUILD_TYPE MATCHES Debug) + if(NOT EXPERIMENTAL_BUILD) + message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") + endif() + endif() + endif() + +# ENABLE_CUDA IS A RAJA PERFSUITE OPTION + if(ENABLE_CUDA) + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") + enable_language(CUDA) + endif() + if(ENABLE_OPENMP) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring") + endif() + + add_subdirectory(tpl/kokkos) + get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES) + include_directories(${KOKKOS_INCLUDE_DIRS}) + list(APPEND RAJA_PERFSUITE_DEPENDS kokkos) +endif() + add_subdirectory(src) if (RAJA_PERFSUITE_ENABLE_TESTS) diff --git a/Dockerfile b/Dockerfile index 037f59b60..f24f5d0ef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ############################################################################## -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### @@ -41,8 +41,9 @@ FROM ghcr.io/rse-ops/clang-ubuntu-20.04:llvm-11.0.0 AS clang11 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN . /opt/spack/share/spack/setup-env.sh && spack load llvm && \ - cmake -DCMAKE_CXX_COMPILER=clang++ -DENABLE_OPENMP=On .. && \ +RUN . /opt/spack/share/spack/setup-env.sh && \ + export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \ + cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On .. && \ make -j 6 &&\ ctest -T test --output-on-failure @@ -50,7 +51,9 @@ FROM ghcr.io/rse-ops/clang-ubuntu-20.04:llvm-11.0.0 AS clang11-debug ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug .. && \ +RUN . /opt/spack/share/spack/setup-env.sh && \ + export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \ + cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On .. && \ make -j 6 &&\ ctest -T test --output-on-failure @@ -58,7 +61,8 @@ FROM ghcr.io/rse-ops/clang-ubuntu-22.04:llvm-13.0.0 AS clang13 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN . /opt/spack/share/spack/setup-env.sh && spack load llvm && \ +RUN . /opt/spack/share/spack/setup-env.sh && \ + export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \ cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On .. && \ make -j 6 &&\ ctest -T test --output-on-failure @@ -87,16 +91,16 @@ RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \ cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \ make -j 4 -FROM ghcr.io/rse-ops/hip-ubuntu-20.04:hip-4.3.1 AS hip +FROM ghcr.io/rse-ops/hip-ubuntu-20.04:hip-5.1.3 AS hip ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN . /opt/spack/share/spack/setup-env.sh && spack load hip llvm-amdgpu && \ - cmake -DCMAKE_CXX_COMPILER=amdclang++ -DRAJA_ENABLE_EXTERNAL_ROCPRIM=Off -DHIP_PATH=/opt -DENABLE_HIP=On -DENABLE_CUDA=Off -DENABLE_OPENMP=Off -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off -DBLT_EXPORT_THIRDPARTY=On .. && \ + cmake -DCMAKE_CXX_COMPILER=clang++ -DHIP_PATH=/opt -DENABLE_HIP=On -DENABLE_CUDA=Off -DENABLE_OPENMP=Off -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off -DBLT_EXPORT_THIRDPARTY=On .. && \ make -j 6 -FROM ghcr.io/rse-ops/intel-ubuntu-22.04:intel-2022.0.1 AS sycl +FROM ghcr.io/rse-ops/intel-ubuntu-22.04:intel-2022.1.0 AS sycl ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build diff --git a/LICENSE b/LICENSE index 8e4df6528..039a20b01 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC. +Copyright (c) 2017-2023, Lawrence Livermore National Security, LLC. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index a85ad0f5f..9049cb1ac 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ [comment]: # (#################################################################) -[comment]: # (Copyright 2017-2022, Lawrence Livermore National Security, LLC) +[comment]: # (Copyright 2017-23, Lawrence Livermore National Security, LLC) [comment]: # (and RAJA Performance Suite project contributors.) [comment]: # (See the RAJAPerf/LICENSE file for details.) [comment]: # [comment]: # (# SPDX-License-Identifier: BSD-3-Clause) [comment]: # (#################################################################) - RAJA Performance Suite ====================== @@ -480,7 +479,7 @@ Here is what a header file for the FOO kernel object should look like: ```cpp //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/RELEASE b/RELEASE index 9096758f6..4b8dcac50 100644 --- a/RELEASE +++ b/RELEASE @@ -1,8 +1,8 @@ ******************************************************************************* -RAJA Performance Suite: ................................, version 0.11.0 +RAJA Performance Suite -Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC. +Copyright (c) 2017-23, Lawrence Livermore National Security, LLC. Produced at the Lawrence Livermore National Laboratory. All rights reserved. See details in the RAJAPerf/LICENSE file. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cfd56b94e..484869eba 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,11 +1,5 @@ -variables: - DO_BUILD: 'yes' - DO_TEST: 'yes' - DO_INSTALL: 'yes' - COMPILER: 'g++' - jobs: -#- job: Windows #temporarily commenting out until cmake/azure version issue resolved +#- job: Windows #commented out since we really don't need this # strategy: # matrix: # shared: @@ -32,7 +26,6 @@ jobs: # inputs: # script: 'ctest.exe -T test -C Release' # workingDirectory: 'build' -# condition: eq( variables['Agent.OS'], 'Windows_NT') # - task: PublishTestResults@2 # inputs: # testResultsFormat: 'cTest' @@ -63,8 +56,8 @@ jobs: ## docker_target: nvcc11-debug hip: docker_target: hip -# sycl: -# docker_target: sycl +## sycl: +## docker_target: sycl pool: vmImage: 'ubuntu-latest' variables: @@ -79,6 +72,10 @@ jobs: command: build dockerFile: 'Dockerfile' arguments: '--target $(docker_target)' + - task: PublishTestResults@2 + inputs: + testResultsFormat: 'cTest' + testResultsFiles: '**/Test.xml' - job: Mac pool: vmImage: 'macOS-latest' @@ -99,6 +96,9 @@ jobs: condition: eq( variables['Agent.OS'], 'Darwin') - script: | cd build - ./bin/raja-perf.exe --checkrun 5 -sp - displayName: 'Run Perf Suite' - condition: eq( variables['Agent.OS'], 'Darwin') + ctest -T test --output-on-failure + displayName: 'OSX Test' + - task: PublishTestResults@2 + inputs: + testResultsFormat: 'cTest' + testResultsFiles: '**/Test.xml' diff --git a/blt b/blt index 296bf64e6..8c229991e 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit 296bf64e64edfcfcce6a53e3b396d6529e76b986 +Subproject commit 8c229991e65e7a9603c621b47cb3ba158bb7468c diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 324f964ad..f05571e8d 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -1,8 +1,14 @@ #!/usr/bin/env bash +# Initialize modules for users not using bash as a default shell +if test -e /usr/share/lmod/lmod/init/bash +then + . /usr/share/lmod/lmod/init/bash +fi + ############################################################################### -# Copyright (c) 2016-21, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJAPerf/LICENSE file for details. +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC and RAJA +# project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### @@ -20,12 +26,30 @@ hostconfig=${HOST_CONFIG:-""} spec=${SPEC:-""} job_unique_id=${CI_JOB_ID:-""} raja_version=${UPDATE_RAJA:-""} - sys_type=${SYS_TYPE:-""} -py_env_path=${PYTHON_ENVIRONMENT_PATH:-""} + +prefix="" + +if [[ -d /dev/shm ]] +then + prefix="/dev/shm/${hostname}" + if [[ -z ${job_unique_id} ]]; then + job_unique_id=manual_job_$(date +%s) + while [[ -d ${prefix}-${job_unique_id} ]] ; do + sleep 1 + job_unique_id=manual_job_$(date +%s) + done + fi + + prefix="${prefix}-${job_unique_id}" + mkdir -p ${prefix} +fi # Dependencies date +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo "~~~~~ Build and test started" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if [[ "${option}" != "--build-only" && "${option}" != "--test-only" ]] then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -42,25 +66,25 @@ then if [[ -d /dev/shm ]] then - prefix="/dev/shm/${hostname}" - if [[ -z ${job_unique_id} ]]; then - job_unique_id=manual_job_$(date +%s) - while [[ -d ${prefix}/${job_unique_id} ]] ; do - sleep 1 - job_unique_id=manual_job_$(date +%s) - done - fi - - prefix="${prefix}/${job_unique_id}" - mkdir -p ${prefix} prefix_opt="--prefix=${prefix}" + + # We force Spack to put all generated files (cache and configuration of + # all sorts) in a unique location so that there can be no collision + # with existing or concurrent Spack. + spack_user_cache="${prefix}/spack-user-cache" + export SPACK_DISABLE_LOCAL_CONFIG="" + export SPACK_USER_CACHE_PATH="${spack_user_cache}" + mkdir -p ${spack_user_cache} fi - python3 tpl/RAJA/scripts/uberenv/uberenv.py --project-json=".uberenv_config.json" --spec="${spec}" ${prefix_opt} + ./tpl/RAJA/scripts/uberenv/uberenv.py --project-json=".uberenv_config.json" --spec="${spec}" ${prefix_opt} - mv ${project_dir}/tpl/RAJA/hc-*.cmake ${project_dir}/. + mv ${project_dir}/tpl/RAJA/*.cmake ${project_dir}/. fi + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~ Dependencies Built" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" date # Host config file @@ -68,18 +92,18 @@ if [[ -z ${hostconfig} ]] then # If no host config file was provided, we assume it was generated. # This means we are looking of a unique one in project dir. - hostconfigs=( $( ls "${project_dir}/"hc-*.cmake ) ) + hostconfigs=( $( ls "${project_dir}/"*.cmake ) ) if [[ ${#hostconfigs[@]} == 1 ]] then hostconfig_path=${hostconfigs[0]} echo "Found host config file: ${hostconfig_path}" elif [[ ${#hostconfigs[@]} == 0 ]] then - echo "No result for: ${project_dir}/hc-*.cmake" + echo "No result for: ${project_dir}/*.cmake" echo "Spack generated host-config not found." exit 1 else - echo "More than one result for: ${project_dir}/hc-*.cmake" + echo "More than one result for: ${project_dir}/*.cmake" echo "${hostconfigs[@]}" echo "Please specify one with HOST_CONFIG variable" exit 1 @@ -89,14 +113,25 @@ else hostconfig_path="${project_dir}/host-configs/${hostconfig}" fi +hostconfig=$(basename ${hostconfig_path}) + # Build Directory if [[ -z ${build_root} ]] then - build_root=$(pwd) + if [[ -d /dev/shm ]] + then + build_root="${prefix}" + else + build_root="$(pwd)" + fi +else + build_root="${build_root}" fi build_dir="${build_root}/build_${hostconfig//.cmake/}" +cmake_exe=`grep 'CMake executable' ${hostconfig_path} | cut -d ':' -f 2 | xargs` + # Build if [[ "${option}" != "--deps-only" && "${option}" != "--test-only" ]] then @@ -108,11 +143,7 @@ then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo "" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo "~~~~ ENV ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - echo "~~~~~ Building RAJA PerfSuite" + echo "~~~~~ Building RAJA Perf Suite" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" # Map CPU core allocations @@ -121,7 +152,7 @@ then # If using Multi-project, set up the submodule if [[ -n ${raja_version} ]] then - cd tpl/RAJA + cd tpl/RAJA echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo "~~~~ Updating RAJA Submodule to develop ~~~" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -146,21 +177,18 @@ then then module unload rocm fi - - cmake \ + $cmake_exe \ -C ${hostconfig_path} \ ${project_dir} - if echo ${spec} | grep -q "intel" ; then - cmake --build . -j 16 - echo "~~~~~~~~~ Build Command: ~~~~~~~~~~~~~~~~~~~~~" - echo "cmake --build . -j 16" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - else - cmake --build . -j ${core_counts[$truehostname]} - echo "~~~~~~~~~ Build Command: ~~~~~~~~~~~~~~~~~~~~~" - echo "cmake --build . -j ${core_counts[$truehostname]}" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + if ! $cmake_exe --build . -j ${core_counts[$truehostname]} + then + echo "ERROR: compilation failed, building with verbose output..." + $cmake_exe --build . --verbose -j 1 fi + + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~ RAJA Perf Suite Built" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" date fi @@ -179,7 +207,7 @@ if grep -q -i "ENABLE_TESTS.*ON" ${hostconfig_path} then # - # Maintaining separate, but identical release and debug sections + # Maintaining separate, but identical release and debug sections # in case we want to make them disctinct in the future. # @@ -233,3 +261,7 @@ echo "~~~~~ CLEAN UP" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" make clean +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo "~~~~~ Build and test completed" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +date diff --git a/scripts/install_llvm.sh b/scripts/install_llvm.sh index 60bfccd39..f1a16dcfa 100755 --- a/scripts/install_llvm.sh +++ b/scripts/install_llvm.sh @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/scripts/lc-builds/blueos_clang.sh b/scripts/lc-builds/blueos_clang.sh index 2329cca3c..a6fc06451 100755 --- a/scripts/lc-builds/blueos_clang.sh +++ b/scripts/lc-builds/blueos_clang.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -31,7 +31,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/blueos_clang_omptarget.sh b/scripts/lc-builds/blueos_clang_omptarget.sh index e557c2dac..2f7fdf5e9 100755 --- a/scripts/lc-builds/blueos_clang_omptarget.sh +++ b/scripts/lc-builds/blueos_clang_omptarget.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -31,7 +31,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -40,8 +40,9 @@ cmake \ -C ${RAJA_HOSTCONFIG} \ -DENABLE_OPENMP=On \ -DENABLE_CUDA=Off \ - -DENABLE_TARGET_OPENMP=On \ - -DOpenMP_CXX_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DRAJA_ENABLE_TARGET_OPENMP=On \ + -DBLT_OPENMP_COMPILE_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DBLT_OPENMP_LINK_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_gcc.sh b/scripts/lc-builds/blueos_gcc.sh index b3ecbeb70..b51ad749a 100755 --- a/scripts/lc-builds/blueos_gcc.sh +++ b/scripts/lc-builds/blueos_gcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/blueos_nvcc_clang.sh b/scripts/lc-builds/blueos_nvcc_clang.sh index 105938283..9801459b9 100755 --- a/scripts/lc-builds/blueos_nvcc_clang.sh +++ b/scripts/lc-builds/blueos_nvcc_clang.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -11,11 +11,11 @@ if [[ $# -ne 3 ]]; then echo echo "You must pass 3 arguments to the script (in this order): " echo " 1) compiler version number for nvcc" - echo " 2) CUDA compute architecture" + echo " 2) CUDA compute architecture (number only, not 'sm_70' for example)" echo " 3) compiler version number for clang. " echo echo "For example: " - echo " blueos_nvcc_clang.sh 10.2.89 sm_70 10.0.1" + echo " blueos_nvcc_clang.sh 10.2.89 70 10.0.1" exit fi @@ -36,7 +36,7 @@ echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -47,7 +47,7 @@ cmake \ -DENABLE_CUDA=On \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ - -DCUDA_ARCH=${COMP_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_nvcc_gcc.sh b/scripts/lc-builds/blueos_nvcc_gcc.sh index 46f64ee17..200e86f9b 100755 --- a/scripts/lc-builds/blueos_nvcc_gcc.sh +++ b/scripts/lc-builds/blueos_nvcc_gcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -11,11 +11,11 @@ if [[ $# -ne 3 ]]; then echo echo "You must pass 3 arguments to the script (in this order): " echo " 1) compiler version number for nvcc" - echo " 2) CUDA compute architecture" - echo " 3) compiler version number for gcc. " + echo " 2) CUDA compute architecture (number only, not 'sm_70' for example)" + echo " 3) compiler version number for gcc" echo echo "For example: " - echo " blueos_nvcc_gcc.sh 10.2.89 sm_70 8.3.1" + echo " blueos_nvcc_gcc.sh 10.2.89 70 8.3.1" exit fi @@ -36,7 +36,7 @@ echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -47,7 +47,7 @@ cmake \ -DENABLE_CUDA=On \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ - -DCUDA_ARCH=${COMP_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_nvcc_xl.sh b/scripts/lc-builds/blueos_nvcc_xl.sh index 950505cfc..9f2489694 100755 --- a/scripts/lc-builds/blueos_nvcc_xl.sh +++ b/scripts/lc-builds/blueos_nvcc_xl.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -11,11 +11,11 @@ if [[ $# -ne 3 ]]; then echo echo "You must pass 3 arguments to the script (in this order): " echo " 1) compiler version number for nvcc" - echo " 2) CUDA compute architecture" - echo " 3) compiler version number for xl. " + echo " 2) CUDA compute architecture (number only, not 'sm_70' for example)" + echo " 3) compiler version number for xl" echo echo "For example: " - echo " blueos_nvcc_xl.sh 11.1.1 sm_70 2021.03.31" + echo " blueos_nvcc_xl.sh 11.1.1 70 2021.03.31" exit fi @@ -36,7 +36,7 @@ echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -47,7 +47,7 @@ cmake \ -DENABLE_CUDA=On \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ - -DCUDA_ARCH=${COMP_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_pgi.sh b/scripts/lc-builds/blueos_pgi.sh index d6c915fb9..c715d1c25 100755 --- a/scripts/lc-builds/blueos_pgi.sh +++ b/scripts/lc-builds/blueos_pgi.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/blueos_spectrum_nvcc_clang.sh b/scripts/lc-builds/blueos_spectrum_nvcc_clang.sh index 83bcb2903..631f8ef5c 100755 --- a/scripts/lc-builds/blueos_spectrum_nvcc_clang.sh +++ b/scripts/lc-builds/blueos_spectrum_nvcc_clang.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -11,12 +11,12 @@ if [[ $# -lt 4 ]]; then echo echo "You must pass 4 arguments to the script (in this order): " echo " 1) compiler version number for spectrum mpi" - echo " 2) compiler version number for nvcc" + echo " 2) compiler version number for nvcc (number only, not 'sm_70' for example)" echo " 3) CUDA compute architecture" echo " 4) compiler version number for clang. " echo echo "For example: " - echo " blueos_nvcc_clang.sh rolling-release 10.2.89 sm_70 10.0.1" + echo " blueos_spectrum_nvcc_clang.sh rolling-release 10.2.89 70 10.0.1" exit fi @@ -38,7 +38,7 @@ echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -51,7 +51,7 @@ cmake \ -DENABLE_CUDA=On \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ - -DCUDA_ARCH=${COMP_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_xl.sh b/scripts/lc-builds/blueos_xl.sh index 8630e419d..5d30ab1ea 100755 --- a/scripts/lc-builds/blueos_xl.sh +++ b/scripts/lc-builds/blueos_xl.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/blueos_xl_omptarget.sh b/scripts/lc-builds/blueos_xl_omptarget.sh index 9d18d4622..5f972f0dc 100755 --- a/scripts/lc-builds/blueos_xl_omptarget.sh +++ b/scripts/lc-builds/blueos_xl_omptarget.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -10,14 +10,14 @@ if [ "$1" == "" ]; then echo echo "You must pass a compiler version number to script. For example," - echo " blueos_xl_omptarget.sh 2021.03.31" + echo " blueos_xl_omptarget.sh 2022.08.19" exit fi COMP_VER=$1 shift 1 -BUILD_SUFFIX=lc_blueos-xl-${COMP_VER} +BUILD_SUFFIX=lc_blueos-xl_omptarget-${COMP_VER} RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/blueos/xl_X.cmake echo @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -37,8 +37,9 @@ cmake \ -DBLT_CXX_STD=c++14 \ -C ${RAJA_HOSTCONFIG} \ -DENABLE_OPENMP=On \ - -DENABLE_TARGET_OPENMP=On \ - -DOpenMP_CXX_FLAGS="-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi" \ + -DRAJA_ENABLE_TARGET_OPENMP=On \ + -DBLT_OPENMP_COMPILE_FLAGS="-qoffload;-qsmp=omp;-qalias=noansi" \ + -DBLT_OPENMP_LINK_FLAGS="-qoffload;-qsmp=omp;-qalias=noansi" \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/toss3_clang.sh b/scripts/lc-builds/toss3_clang.sh index e3b51716d..7406363bc 100755 --- a/scripts/lc-builds/toss3_clang.sh +++ b/scripts/lc-builds/toss3_clang.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/toss3_gcc.sh b/scripts/lc-builds/toss3_gcc.sh index 031b01a7b..4e7bf6bc1 100755 --- a/scripts/lc-builds/toss3_gcc.sh +++ b/scripts/lc-builds/toss3_gcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/toss3_hipcc.sh b/scripts/lc-builds/toss3_hipcc.sh index 464c8390f..b5d9b2760 100755 --- a/scripts/lc-builds/toss3_hipcc.sh +++ b/scripts/lc-builds/toss3_hipcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -14,7 +14,7 @@ if [[ $# -ne 2 ]]; then echo " 2) HIP compute architecture" echo echo "For example: " - echo " toss3_hipcc.sh 4.1.0 gfx906" + echo " toss3_hipcc.sh 5.1.0 gfx906" exit fi @@ -22,6 +22,23 @@ COMP_VER=$1 COMP_ARCH=$2 shift 2 +HIP_CLANG_FLAGS="--offload-arch=${COMP_ARCH}" +HOSTCONFIG="hip_3_X" + +if [[ ${COMP_VER} == 4.5.* ]] +then + HIP_CLANG_FLAGS="${HIP_CLANG_FLAGS} -mllvm -amdgpu-fixed-function-abi=1" + HOSTCONFIG="hip_4_5_link_X" +elif [[ ${COMP_VER} == 4.* ]] +then + HOSTCONFIG="hip_4_link_X" +elif [[ ${COMP_VER} == 3.* ]] +then + HOSTCONFIG="hip_3_X" +else + echo "Unknown hip version, using ${HOSTCONFIG} host-config" +fi + BUILD_SUFFIX=lc_toss3-hipcc-${COMP_VER}-${COMP_ARCH} RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/toss3/hip_link_X.cmake @@ -35,7 +52,7 @@ rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.23.1 cmake \ -DCMAKE_BUILD_TYPE=Release \ @@ -44,7 +61,7 @@ cmake \ -DHIP_CLANG_PATH=/opt/rocm-${COMP_VER}/llvm/bin \ -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang \ -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang++ \ - -DHIP_HIPCC_FLAGS=--offload-arch=${COMP_ARCH} \ + -DHIP_CLANG_FLAGS="${HIP_CLANG_FLAGS}" \ -DBLT_CXX_STD=c++14 \ -C ${RAJA_HOSTCONFIG} \ -DENABLE_HIP=ON \ diff --git a/scripts/lc-builds/toss3_icpc.sh b/scripts/lc-builds/toss3_icpc.sh index 9c941742f..a8b7de2b9 100755 --- a/scripts/lc-builds/toss3_icpc.sh +++ b/scripts/lc-builds/toss3_icpc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -37,7 +37,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 ## # CMake option -DENABLE_FORCEINLINE_RECURSIVE=Off used to speed up compile diff --git a/scripts/lc-builds/toss3_mvapich2_gcc.sh b/scripts/lc-builds/toss3_mvapich2_gcc.sh index 654f9624f..8c9e0662c 100755 --- a/scripts/lc-builds/toss3_mvapich2_gcc.sh +++ b/scripts/lc-builds/toss3_mvapich2_gcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -30,7 +30,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/toss3_pgi.sh b/scripts/lc-builds/toss3_pgi.sh index cd778d5fe..9967dd769 100755 --- a/scripts/lc-builds/toss3_pgi.sh +++ b/scripts/lc-builds/toss3_pgi.sh @@ -29,7 +29,7 @@ echo rm -rf build_${BUILD_SUFFIX} 2>/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.20.2 cmake \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/scripts/lc-builds/toss4_amdclang.sh b/scripts/lc-builds/toss4_amdclang.sh index 4b063be04..7d2de5397 100755 --- a/scripts/lc-builds/toss4_amdclang.sh +++ b/scripts/lc-builds/toss4_amdclang.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and RAJA project contributors. See the RAJA/LICENSE file for details. +# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### @@ -15,7 +15,7 @@ if [[ $# -lt 2 ]]; then echo " 3...) optional arguments to cmake" echo echo "For example: " - echo " toss4_amdclang.sh 4.1.0 gfx906" + echo " toss4_amdclang.sh 5.1.0 gfx906" exit fi @@ -49,7 +49,7 @@ rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} -module load cmake/3.14.5 +module load cmake/3.23.1 # unload rocm to avoid configuration problems where the loaded rocm and COMP_VER # are inconsistent causing the rocprim from the module to be used unexpectedly @@ -64,6 +64,8 @@ cmake \ -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang \ -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang++ \ -DCMAKE_HIP_ARCHITECTURES="${COMP_ARCH}" \ + -DGPU_TARGETS="${COMP_ARCH}" \ + -DAMDGPU_TARGETS="${COMP_ARCH}" \ -DBLT_CXX_STD=c++14 \ -C ${RAJA_HOSTCONFIG} \ -DENABLE_HIP=ON \ diff --git a/scripts/lc-builds/toss4_cray-mpich_amdclang.sh b/scripts/lc-builds/toss4_cray-mpich_amdclang.sh new file mode 100755 index 000000000..614f2caec --- /dev/null +++ b/scripts/lc-builds/toss4_cray-mpich_amdclang.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [[ $# -lt 2 ]]; then + echo + echo "You must pass 2 or more arguments to the script (in this order): " + echo " 1) cray-mpich compiler version number" + echo " 1) HIP compiler version number" + echo " 2) HIP compute architecture" + echo " 3...) optional arguments to cmake" + echo + echo "For example: " + echo " toss4_cray-mpich_amdclang.sh 8.1.14 4.1.0 gfx906" + exit +fi + +MPI_VER=$1 +COMP_VER=$2 +COMP_ARCH=$3 +shift 3 + +HOSTCONFIG="hip_3_X" + +if [[ ${COMP_VER} == 4.* ]] +then +##HIP_CLANG_FLAGS="-mllvm -amdgpu-fixed-function-abi=1" + HOSTCONFIG="hip_4_link_X" +elif [[ ${COMP_VER} == 3.* ]] +then + HOSTCONFIG="hip_3_X" +else + echo "Unknown hip version, using ${HOSTCONFIG} host-config" +fi + +# if [[ ${COMP_ARCH} == gfx90a ]] +# then + # note that unsafe atomics require use of coarse grain memory +##HIP_CLANG_FLAGS="-munsafe-fp-atomics" +# fi + +BUILD_SUFFIX=lc_toss4-cray-mpich-${MPI_VER}-amdclang-${COMP_VER}-${COMP_ARCH} +RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/toss4/${HOSTCONFIG}.cmake + +echo +echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + + +module load cmake/3.23.1 + +# unload rocm to avoid configuration problems where the loaded rocm and COMP_VER +# are inconsistent causing the rocprim from the module to be used unexpectedly +module unload rocm + + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DMPI_C_COMPILER="/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpiamdclang" \ + -DMPI_CXX_COMPILER="/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpiamdclang++" \ + -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \ + -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \ + -DHIP_PATH=/opt/rocm-${COMP_VER}/llvm/bin \ + -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang \ + -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang++ \ + -DCMAKE_HIP_ARCHITECTURES="${COMP_ARCH}" \ + -DGPU_TARGETS="${COMP_ARCH}" \ + -DAMDGPU_TARGETS="${COMP_ARCH}" \ + -DBLT_CXX_STD=c++14 \ + -C ${RAJA_HOSTCONFIG} \ + -DENABLE_MPI=ON \ + -DENABLE_HIP=ON \ + -DENABLE_OPENMP=OFF \ + -DENABLE_CUDA=OFF \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo " Please note that you have to have a consistent build environment" +echo " when you make RAJA as cmake may reconfigure; unload the rocm module" +echo " or load the appropriate rocm module (${COMP_VER}) when building." +echo +echo " module unload rocm" +echo " srun -n1 make" +echo +echo " Please note that cray-mpich requires libmodules.so.1 from cce to run." +echo " Until this is handled transparently in the build system you may add " +echo " cce to your LD_LIBRARY_PATH." +echo +echo " export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/cce-tce/cce-13.0.2/cce/x86_64/lib/" +echo +echo "***********************************************************************" diff --git a/scripts/make_release_tarball.sh b/scripts/make_release_tarball.sh index 6b0a3b804..cd86cdc80 100755 --- a/scripts/make_release_tarball.sh +++ b/scripts/make_release_tarball.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py new file mode 100644 index 000000000..4e25af802 --- /dev/null +++ b/scripts/spack_packages/camp/package.py @@ -0,0 +1,98 @@ +# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import glob + +from spack.package import * + + +def hip_repair_options(options, spec): + # there is only one dir like this, but the version component is unknown + options.append( + "-DHIP_CLANG_INCLUDE_PATH=" + + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0] + ) + + +def hip_repair_cache(options, spec): + # there is only one dir like this, but the version component is unknown + options.append( + cmake_cache_path( + "HIP_CLANG_INCLUDE_PATH", + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0], + ) + ) + + +class Camp(CMakePackage, CudaPackage, ROCmPackage): + """ + Compiler agnostic metaprogramming library providing concepts, + type operations and tuples for C++ and cuda + """ + + homepage = "https://github.com/LLNL/camp" + git = "https://github.com/LLNL/camp.git" + url = "https://github.com/LLNL/camp/archive/v0.1.0.tar.gz" + + maintainers = ["trws"] + + version("main", branch="main", submodules="True") + version("2022.10.0", sha256="3561c3ef00bbcb61fe3183c53d49b110e54910f47e7fc689ad9ccce57e55d6b8") + version("2022.03.2", sha256="bc4aaeacfe8f2912e28f7a36fc731ab9e481bee15f2c6daf0cb208eed3f201eb") + version("2022.03.0", sha256="e9090d5ee191ea3a8e36b47a8fe78f3ac95d51804f1d986d931e85b8f8dad721") + version("0.3.0", sha256="129431a049ca5825443038ad5a37a86ba6d09b2618d5fe65d35f83136575afdb") + version("0.2.3", sha256="58a0f3bd5eadb588d7dc83f3d050aff8c8db639fc89e8d6553f9ce34fc2421a7") + version("0.2.2", sha256="194d38b57e50e3494482a7f94940b27f37a2bee8291f2574d64db342b981d819") + version("0.1.0", sha256="fd4f0f2a60b82a12a1d9f943f8893dc6fe770db493f8fae5ef6f7d0c439bebcc") + + # TODO: figure out gtest dependency and then set this default True. + variant("tests", default=False, description="Build tests") + variant("openmp", default=False, description="Build with OpenMP support") + + depends_on("cub", when="+cuda") + + depends_on("blt") + + def cmake_args(self): + spec = self.spec + + options = [] + + options.append("-DBLT_SOURCE_DIR={0}".format(spec["blt"].prefix)) + + if "+cuda" in spec: + options.extend([ + "-DENABLE_CUDA=ON", + "-DCUDA_TOOLKIT_ROOT_DIR=%s" % (spec["cuda"].prefix) + ]) + + if not spec.satisfies("cuda_arch=none"): + cuda_arch = spec.variants["cuda_arch"].value + options.append("-DCMAKE_CUDA_ARCHITECTURES={0}".format(cuda_arch[0])) + options.append("-DCUDA_ARCH=sm_{0}".format(cuda_arch[0])) + flag = "-arch sm_{0}".format(cuda_arch[0]) + options.append("-DCMAKE_CUDA_FLAGS:STRING={0}".format(flag)) + else: + options.append("-DENABLE_CUDA=OFF") + + if "+rocm" in spec: + options.extend([ + "-DENABLE_HIP=ON", + "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix) + ]) + + hip_repair_options(options, spec) + + archs = self.spec.variants["amdgpu_target"].value + if archs != "none": + arch_str = ",".join(archs) + options.append("-DHIP_HIPCC_FLAGS=--amdgpu-target={0}".format(arch_str)) + else: + options.append("-DENABLE_HIP=OFF") + + options.append(self.define_from_variant("ENABLE_OPENMP", "openmp")) + options.append(self.define_from_variant("ENABLE_TESTS", "tests")) + + return options diff --git a/scripts/spack_packages/hip/package.py b/scripts/spack_packages/hip/package.py deleted file mode 100644 index f99d26dc6..000000000 --- a/scripts/spack_packages/hip/package.py +++ /dev/null @@ -1,55 +0,0 @@ -from spack import * - - -class Hip(CMakePackage): - """HIP is a C++ Runtime API and Kernel Language that allows developers to - create portable applications for AMD and NVIDIA GPUs from - single source code.""" - - homepage = "https://github.com/ROCm-Developer-Tools/HIP" - url = "https://github.com/ROCm-Developer-Tools/HIP/archive/refs/tags/rocm-4.0.0.tar.gz" - - maintainers = ['srekolam', 'arjun-raj-kuppala'] - - version('4.1.0', sha256='25ad58691456de7fd9e985629d0ed775ba36a2a0e0b21c086bd96ba2fb0f7ed1') - version('4.0.0', sha256='0082c402f890391023acdfd546760f41cb276dffc0ffeddc325999fd2331d4e8') - - depends_on('cmake@3:', type='build') - depends_on('perl@5.10:', type=('build', 'run')) - depends_on('mesa~llvm@18.3:') - - for ver in ['4.0.0', '4.1.0']: - depends_on('rocclr@' + ver, type='build', when='@' + ver) - depends_on('hsakmt-roct@' + ver, type='build', when='@' + ver) - depends_on('hsa-rocr-dev@' + ver, type='link', when='@' + ver) - depends_on('comgr@' + ver, type='build', when='@' + ver) - depends_on('llvm-amdgpu@' + ver, type='build', when='@' + ver) - depends_on('rocm-device-libs@' + ver, type='build', when='@' + ver) - depends_on('rocminfo@' + ver, type='build', when='@' + ver) - - def setup_dependent_package(self, module, dependent_spec): - self.spec.hipcc = join_path(self.prefix.bin, 'hipcc') - - @run_before('install') - def filter_sbang(self): - perl = self.spec['perl'].command - kwargs = {'ignore_absent': False, 'backup': False, 'string': False} - - with working_dir('bin'): - match = '^#!/usr/bin/perl' - substitute = "#!{perl}".format(perl=perl) - files = [ - 'hipify-perl', 'hipcc', 'extractkernel', - 'hipconfig', 'hipify-cmakefile' - ] - filter_file(match, substitute, *files, **kwargs) - - def cmake_args(self): - args = [ - '-DHIP_COMPILER=clang', - '-DHIP_PLATFORM=rocclr', - '-DHSA_PATH={0}'.format(self.spec['hsa-rocr-dev'].prefix), - '-DLIBROCclr_STATIC_DIR={0}/lib'.format(self.spec['rocclr'].prefix) - ] - return args - diff --git a/scripts/spack_packages/raja_perf/package.py b/scripts/spack_packages/raja_perf/package.py index c0ba13602..86508ef9c 100644 --- a/scripts/spack_packages/raja_perf/package.py +++ b/scripts/spack_packages/raja_perf/package.py @@ -5,6 +5,7 @@ from spack import * +from spack.pkg.builtin.camp import hip_repair_cache import socket import os @@ -59,6 +60,7 @@ class RajaPerf(CMakePackage, CudaPackage, ROCmPackage): version('develop', branch='develop', submodules='True') version('main', branch='main', submodules='True') + version('0.12.0', tag='v0.12.0', submodules="True") version('0.11.0', tag='v0.11.0', submodules="True") version('0.10.0', tag='v0.10.0', submodules="True") version('0.9.0', tag='v0.9.0', submodules="True") @@ -77,9 +79,19 @@ class RajaPerf(CMakePackage, CudaPackage, ROCmPackage): variant('tests', default='basic', values=('none', 'basic', 'benchmarks'), multi=False, description='Tests to run') - depends_on('cmake@3.9:', type='build') - depends_on('blt@0.4.1', type='build', when='@main') - depends_on('blt@0.4.1:', type='build') + depends_on("blt") + depends_on("blt@0.5.0:", type="build", when="@0.12.0:") + depends_on("blt@0.4.1:", type="build", when="@0.11.0:") + depends_on("blt@0.4.0:", type="build", when="@0.8.0:") + depends_on("blt@0.3.0:", type="build", when="@:0.7.0") + + depends_on("cmake@3.20:", when="@0.12.0:", type="build") + depends_on("cmake@3.23:", when="@0.12.0: +rocm", type="build") + depends_on("cmake@3.14:", when="@:0.12.0", type="build") + + depends_on("llvm-openmp", when="+openmp %apple-clang") + + depends_on("rocprim", when="+rocm") conflicts('+openmp', when='+rocm') conflicts('~openmp', when='+openmp_target', msg='OpenMP target requires OpenMP') @@ -217,10 +229,11 @@ def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): gcc_name_regex = re.compile(".*gcc-name.*") using_toolchain = list(filter(gcc_toolchain_regex.match, spec.compiler_flags['cxxflags'])) + if(using_toolchain): gcc_toolchain_path = gcc_toolchain_regex.match(using_toolchain[0]) using_gcc_name = list(filter(gcc_name_regex.match, spec.compiler_flags['cxxflags'])) - compilers_using_toolchain = ["pgi", "xl", "icpc"] + compilers_using_toolchain = ["pgi", "xl", "icpc", "clang"] if any(compiler in cpp_compiler for compiler in compilers_using_toolchain): if using_toolchain or using_gcc_name: cfg.write(cmake_cache_entry("BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE", @@ -265,6 +278,12 @@ def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): cuda_release_flags = "-O3 -Xcompiler -Ofast -Xcompiler -finline-functions" cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast -Xcompiler -finline-functions" cuda_debug_flags = "-O0 -g -Xcompiler -O0 -Xcompiler -finline-functions" + + if (using_toolchain): + gcc_prefix = gcc_toolchain_path.group(1) + cuda_release_flags += " -Xcompiler --gcc-toolchain={0}".format(gcc_prefix) + cuda_reldebinf_flags += " -Xcompiler --gcc-toolchain={0}".format(gcc_prefix) + cuda_debug_flags += " -Xcompiler --gcc-toolchain={0}".format(gcc_prefix) cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS_RELEASE", cuda_release_flags)) cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS_RELWITHDEBINFO", cuda_reldebinf_flags)) @@ -273,6 +292,7 @@ def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): if not spec.satisfies('cuda_arch=none'): cuda_arch = spec.variants['cuda_arch'].value cfg.write(cmake_cache_string("CUDA_ARCH", 'sm_{0}'.format(cuda_arch[0]))) + cfg.write(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", '{0}'.format(cuda_arch[0]))) else: cfg.write(cmake_cache_option("ENABLE_CUDA", False)) @@ -283,7 +303,6 @@ def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): cfg.write("#------------------{0}\n\n".format("-" * 60)) cfg.write(cmake_cache_option("ENABLE_HIP", True)) - cfg.write(cmake_cache_option("ENABLE_TESTS", not 'tests=none' in spec or self.run_tests)) hip_root = spec['hip'].prefix rocm_root = hip_root + "/.." @@ -321,9 +340,11 @@ def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): cfg.write(cmake_cache_option("ENABLE_OPENMP_TARGET", "+openmp_target" in spec)) if "+openmp_target" in spec: if ('%xl' in spec): - cfg.write(cmake_cache_string("OpenMP_CXX_FLAGS", "-qsmp=omp;-qoffload;-qnoeh;-qalias=noansi")) + cfg.write(cmake_cache_string("BLT_OPENMP_COMPILE_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi")) + cfg.write(cmake_cache_string("BLT_OPENMP_LINK_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi")) if ('%clang' in spec): - cfg.write(cmake_cache_string("OpenMP_CXX_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda")) + cfg.write(cmake_cache_string("BLT_OPENMP_COMPILE_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda")) + cfg.write(cmake_cache_string("BLT_OPENMP_LINK_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda")) cfg.write(cmake_cache_option("ENABLE_CUDA", False)) diff --git a/scripts/travis_build_and_test.sh b/scripts/travis_build_and_test.sh index d53c88550..5ca692a49 100755 --- a/scripts/travis_build_and_test.sh +++ b/scripts/travis_build_and_test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/scripts/ubuntu-builds/ubuntu_clang.sh b/scripts/ubuntu-builds/ubuntu_clang.sh index 664685f42..68b722774 100755 --- a/scripts/ubuntu-builds/ubuntu_clang.sh +++ b/scripts/ubuntu-builds/ubuntu_clang.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) diff --git a/scripts/ubuntu-builds/ubuntu_gcc.sh b/scripts/ubuntu-builds/ubuntu_gcc.sh index f0eb7fcf7..04c57fce7 100755 --- a/scripts/ubuntu-builds/ubuntu_gcc.sh +++ b/scripts/ubuntu-builds/ubuntu_gcc.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJAPerf/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) diff --git a/scripts/update_copyright.sh b/scripts/update_copyright.sh index 31a658efa..d3bdeb170 100755 --- a/scripts/update_copyright.sh +++ b/scripts/update_copyright.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # @@ -11,7 +11,7 @@ #============================================================================= # Change the copyright date in all files that contain the text # "the RAJAPerf/LICENSE file", which is part of the copyright statement -# at the top of each RAJA file. We use this to distinguish RAJA files from +# at the top of each RAJA file. We use this to distinguish RAJAPerf files from # that we do not own (e.g., other repos included as submodules), which we do # not want to modify. Note that this file and *.git files are omitted # as well. @@ -46,14 +46,18 @@ for i in `cat files2change` do echo $i cp $i $i.sed.bak - sed "s/Copyright (c) 2017-21/Copyright (c) 2017-22/" $i.sed.bak > $i + sed "s/Copyright (c) 2017-22/Copyright (c) 2017-23/" $i.sed.bak > $i done -for i in LICENSE RELEASE README.md +echo LICENSE +cp LICENSE LICENSE.sed.bak +sed "s/Copyright (c) 2017-2022/Copyright (c) 2017-2023/" LICENSE.sed.bak > LICENSE + +for i in RELEASE README.md do echo $i cp $i $i.sed.bak - sed "s/2017-2021/2017-2022/" $i.sed.bak > $i + sed "s/2017-22/2017-23/" $i.sed.bak > $i done #============================================================================= diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bc1bf6b77..5f93d967b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # @@ -9,30 +9,39 @@ include_directories(.) add_subdirectory(common) -add_subdirectory(apps) add_subdirectory(basic) +add_subdirectory(basic-kokkos) +add_subdirectory(apps) add_subdirectory(lcals) +add_subdirectory(lcals-kokkos) add_subdirectory(polybench) add_subdirectory(stream) +add_subdirectory(stream-kokkos) add_subdirectory(algorithm) set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS common apps basic + basic-kokkos lcals + lcals-kokkos polybench stream + stream-kokkos algorithm) list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) -if(ENABLE_TARGET_OPENMP) +if(RAJA_ENABLE_TARGET_OPENMP) remove_definitions(-DRUN_RAJA_SEQ -DRUN_OPENMP ) blt_add_executable( NAME raja-perf-omptarget.exe SOURCES RAJAPerfSuiteDriver.cpp apps/AppsData.cpp + apps/CONVECTION3DPA.cpp + apps/CONVECTION3DPA-Seq.cpp + apps/CONVECTION3DPA-OMPTarget.cpp apps/DEL_DOT_VEC_2D.cpp apps/DEL_DOT_VEC_2D-Seq.cpp apps/DEL_DOT_VEC_2D-OMPTarget.cpp @@ -222,8 +231,17 @@ blt_add_executable( algorithm/REDUCE_SUM.cpp algorithm/REDUCE_SUM-Seq.cpp algorithm/REDUCE_SUM-OMPTarget.cpp - DEPENDS_ON ${RAJA_PERFSUITE_DEPENDS} + algorithm/MEMSET.cpp + algorithm/MEMSET-Seq.cpp + algorithm/MEMSET-OMPTarget.cpp + algorithm/MEMCPY.cpp + algorithm/MEMCPY-Seq.cpp + algorithm/MEMCPY-OMPTarget.cpp + DEPENDS_ON ${RAJA_PERFSUITE_EXECUTABLE_DEPENDS} ) +install( TARGETS raja-perf-omptarget.exe + RUNTIME DESTINATION bin + ) else() blt_add_executable( @@ -231,4 +249,7 @@ blt_add_executable( SOURCES RAJAPerfSuiteDriver.cpp DEPENDS_ON ${RAJA_PERFSUITE_EXECUTABLE_DEPENDS} ) +install( TARGETS raja-perf.exe + RUNTIME DESTINATION bin + ) endif() diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index d423dcff9..7b3317f65 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -1,11 +1,15 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifdef RUN_KOKKOS +#include +#endif + #include "common/Executor.hpp" #include @@ -24,6 +28,9 @@ int main( int argc, char** argv ) MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl; #endif +#ifdef RUN_KOKKOS + Kokkos::initialize(argc, argv); +#endif // STEP 1: Create suite executor object rajaperf::Executor executor(argc, argv); @@ -43,6 +50,9 @@ int main( int argc, char** argv ) rajaperf::getCout() << "\n\nDONE!!!...." << std::endl; +#ifdef RUN_KOKKOS + Kokkos::finalize(); +#endif #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Finalize(); #endif diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index ec0fcbf74..54334242e 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # @@ -13,6 +13,7 @@ blt_add_library( SCAN-Hip.cpp SCAN-Cuda.cpp SCAN-OMP.cpp + SCAN-OMPTarget.cpp SORT.cpp SORT-Seq.cpp SORT-Hip.cpp @@ -29,5 +30,17 @@ blt_add_library( REDUCE_SUM-Cuda.cpp REDUCE_SUM-OMP.cpp REDUCE_SUM-OMPTarget.cpp + MEMSET.cpp + MEMSET-Seq.cpp + MEMSET-Hip.cpp + MEMSET-Cuda.cpp + MEMSET-OMP.cpp + MEMSET-OMPTarget.cpp + MEMCPY.cpp + MEMCPY-Seq.cpp + MEMCPY-Hip.cpp + MEMCPY-Cuda.cpp + MEMCPY-OMP.cpp + MEMCPY-OMPTarget.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/algorithm/MEMCPY-Cuda.cpp b/src/algorithm/MEMCPY-Cuda.cpp new file mode 100644 index 000000000..324fe5d91 --- /dev/null +++ b/src/algorithm/MEMCPY-Cuda.cpp @@ -0,0 +1,218 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_CUDA) + +#include "common/CudaDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + +#define MEMCPY_DATA_SETUP_CUDA \ + allocAndInitCudaDeviceData(x, m_x, iend); \ + allocAndInitCudaDeviceData(y, m_y, iend); + +#define MEMCPY_DATA_TEARDOWN_CUDA \ + getCudaDeviceData(m_y, y, iend); \ + deallocCudaDeviceData(x); \ + deallocCudaDeviceData(y); + +template < size_t block_size > +__launch_bounds__(block_size) +__global__ void memcpy(Real_ptr x, Real_ptr y, + Index_type iend) +{ + Index_type i = blockIdx.x * block_size + threadIdx.x; + if ( i < iend ) { + MEMCPY_BODY; + } +} + + +void MEMCPY::runCudaVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + if ( vid == Base_CUDA ) { + + MEMCPY_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + cudaErrchk( cudaMemcpyAsync(MEMCPY_STD_ARGS, cudaMemcpyDefault, 0) ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_CUDA; + + } else if ( vid == RAJA_CUDA ) { + + MEMCPY_DATA_SETUP_CUDA; + + camp::resources::Cuda res = camp::resources::Cuda::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_CUDA; + + } else { + + getCout() << "\n MEMCPY : Unknown Cuda variant id = " << vid << std::endl; + + } + +} + +template < size_t block_size > +void MEMCPY::runCudaVariantBlock(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + if ( vid == Base_CUDA ) { + + MEMCPY_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + memcpy<<>>( + x, y, iend ); + cudaErrchk( cudaGetLastError() ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_CUDA; + + } else if ( vid == Lambda_CUDA ) { + + MEMCPY_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto memcpy_lambda = [=] __device__ (Index_type i) { + MEMCPY_BODY; + }; + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + lambda_cuda_forall<<>>( + ibegin, iend, memcpy_lambda ); + cudaErrchk( cudaGetLastError() ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_CUDA; + + } else if ( vid == RAJA_CUDA ) { + + MEMCPY_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall< RAJA::cuda_exec >( + RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_CUDA; + + } else { + + getCout() << "\n MEMCPY : Unknown Cuda variant id = " << vid << std::endl; + + } + +} + +void MEMCPY::runCudaVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_CUDA || vid == RAJA_CUDA) { + + if (tune_idx == t) { + + runCudaVariantLibrary(vid); + + } + + t += 1; + + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + if (tune_idx == t) { + + runCudaVariantBlock(vid); + + } + + t += 1; + + } + + }); +} + +void MEMCPY::setCudaTuningDefinitions(VariantID vid) +{ + if (vid == Base_CUDA || vid == RAJA_CUDA) { + addVariantTuningName(vid, "library"); + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + addVariantTuningName(vid, "block_"+std::to_string(block_size)); + + } + + }); +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_CUDA diff --git a/src/algorithm/MEMCPY-Hip.cpp b/src/algorithm/MEMCPY-Hip.cpp new file mode 100644 index 000000000..b457d70dc --- /dev/null +++ b/src/algorithm/MEMCPY-Hip.cpp @@ -0,0 +1,222 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_HIP) + +#include "common/HipDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + +#define MEMCPY_DATA_SETUP_HIP \ + allocAndInitHipDeviceData(x, m_x, iend); \ + allocAndInitHipDeviceData(y, m_y, iend); + +#define MEMCPY_DATA_TEARDOWN_HIP \ + getHipDeviceData(m_y, y, iend); \ + deallocHipDeviceData(x); \ + deallocHipDeviceData(y); + +template < size_t block_size > +__launch_bounds__(block_size) +__global__ void memcpy(Real_ptr x, Real_ptr y, + Index_type iend) +{ + Index_type i = blockIdx.x * block_size + threadIdx.x; + if ( i < iend ) { + MEMCPY_BODY; + } +} + + +void MEMCPY::runHipVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + if ( vid == Base_HIP ) { + + MEMCPY_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + hipErrchk( hipMemcpyAsync(MEMCPY_STD_ARGS, hipMemcpyDefault, 0) ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_HIP; + + } else if ( vid == RAJA_HIP ) { + + MEMCPY_DATA_SETUP_HIP; + + camp::resources::Hip res = camp::resources::Hip::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_HIP; + + } else { + + getCout() << "\n MEMCPY : Unknown Hip variant id = " << vid << std::endl; + + } + +} + +template < size_t block_size > +void MEMCPY::runHipVariantBlock(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + if ( vid == Base_HIP ) { + + MEMCPY_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + hipLaunchKernelGGL( (memcpy), + dim3(grid_size), dim3(block_size), 0, 0, + x, y, iend ); + hipErrchk( hipGetLastError() ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_HIP; + + } else if ( vid == Lambda_HIP ) { + + MEMCPY_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto memcpy_lambda = [=] __device__ (Index_type i) { + MEMCPY_BODY; + }; + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + hipLaunchKernelGGL((lambda_hip_forall), + grid_size, block_size, 0, 0, + ibegin, iend, memcpy_lambda); + hipErrchk( hipGetLastError() ); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_HIP; + + } else if ( vid == RAJA_HIP ) { + + MEMCPY_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall< RAJA::hip_exec >( + RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_HIP; + + } else { + + getCout() << "\n MEMCPY : Unknown Hip variant id = " << vid << std::endl; + + } + +} + +void MEMCPY::runHipVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_HIP || vid == RAJA_HIP) { + + if (tune_idx == t) { + + runHipVariantLibrary(vid); + + } + + t += 1; + + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + if (tune_idx == t) { + + runHipVariantBlock(vid); + + } + + t += 1; + + } + + }); + +} + +void MEMCPY::setHipTuningDefinitions(VariantID vid) +{ + if (vid == Base_HIP || vid == RAJA_HIP) { + addVariantTuningName(vid, "library"); + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + addVariantTuningName(vid, "block_"+std::to_string(block_size)); + + } + + }); + +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_HIP diff --git a/src/algorithm/MEMCPY-OMP.cpp b/src/algorithm/MEMCPY-OMP.cpp new file mode 100644 index 000000000..55b63afd6 --- /dev/null +++ b/src/algorithm/MEMCPY-OMP.cpp @@ -0,0 +1,98 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMCPY::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp parallel for + for (Index_type i = ibegin; i < iend; ++i ) { + MEMCPY_BODY; + } + + } + stopTimer(); + + break; + } + + case Lambda_OpenMP : { + + auto memcpy_lambda = [=](Index_type i) { + MEMCPY_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp parallel for + for (Index_type i = ibegin; i < iend; ++i ) { + memcpy_lambda(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMCPY-OMPTarget.cpp b/src/algorithm/MEMCPY-OMPTarget.cpp new file mode 100644 index 000000000..1e8ce3554 --- /dev/null +++ b/src/algorithm/MEMCPY-OMPTarget.cpp @@ -0,0 +1,96 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_TARGET_OPENMP) + +#include "common/OpenMPTargetDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + // + // Define threads per team for target execution + // + const size_t threads_per_team = 256; + +#define MEMCPY_DATA_SETUP_OMP_TARGET \ + int hid = omp_get_initial_device(); \ + int did = omp_get_default_device(); \ +\ + allocAndInitOpenMPDeviceData(x, m_x, iend, did, hid); \ + allocAndInitOpenMPDeviceData(y, m_y, iend, did, hid); + +#define MEMCPY_DATA_TEARDOWN_OMP_TARGET \ + getOpenMPDeviceData(m_y, y, iend, hid, did); \ + deallocOpenMPDeviceData(x, did); \ + deallocOpenMPDeviceData(y, did); + + +void MEMCPY::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + if ( vid == Base_OpenMPTarget ) { + + MEMCPY_DATA_SETUP_OMP_TARGET + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp target is_device_ptr(x, y) device( did ) + #pragma omp teams distribute parallel for \ + thread_limit(threads_per_team) schedule(static, 1) + for (Index_type i = ibegin; i < iend; ++i ) { + MEMCPY_BODY; + } + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_OMP_TARGET + + } else if ( vid == RAJA_OpenMPTarget ) { + + MEMCPY_DATA_SETUP_OMP_TARGET + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall>( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + MEMCPY_DATA_TEARDOWN_OMP_TARGET + + } else { + getCout() << "\n MEMCPY : Unknown OMP Target variant id = " << vid << std::endl; + } + +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_TARGET_OPENMP diff --git a/src/algorithm/MEMCPY-Seq.cpp b/src/algorithm/MEMCPY-Seq.cpp new file mode 100644 index 000000000..ab8d67248 --- /dev/null +++ b/src/algorithm/MEMCPY-Seq.cpp @@ -0,0 +1,174 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMCPY::runSeqVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_Seq : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +} + +void MEMCPY::runSeqVariantDefault(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + MEMCPY_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto memcpy_lambda = [=](Index_type i) { + MEMCPY_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + memcpy_lambda(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +} + +void MEMCPY::runSeqVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_Seq || vid == RAJA_Seq) { + + if (tune_idx == t) { + + runSeqVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runSeqVariantDefault(vid); + + } + + t += 1; +} + +void MEMCPY::setSeqTuningDefinitions(VariantID vid) +{ + if (vid == Base_Seq || vid == RAJA_Seq) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp new file mode 100644 index 000000000..fcc1af34f --- /dev/null +++ b/src/algorithm/MEMCPY.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/DataUtils.hpp" + +namespace rajaperf +{ +namespace algorithm +{ + + +MEMCPY::MEMCPY(const RunParams& params) + : KernelBase(rajaperf::Algorithm_MEMCPY, params) +{ + setDefaultProblemSize(1000000); + setDefaultReps(100); + + setActualProblemSize( getTargetProblemSize() ); + + setItsPerRep( getActualProblemSize() ); + setKernelsPerRep(1); + setBytesPerRep( (1*sizeof(Real_type) + 1*sizeof(Real_type)) * getActualProblemSize() ); + setFLOPsPerRep(0); + + setUsesFeature(Forall); + + setVariantDefined( Base_Seq ); + setVariantDefined( Lambda_Seq ); + setVariantDefined( RAJA_Seq ); + + setVariantDefined( Base_OpenMP ); + setVariantDefined( Lambda_OpenMP ); + setVariantDefined( RAJA_OpenMP ); + + setVariantDefined( Base_OpenMPTarget ); + setVariantDefined( RAJA_OpenMPTarget ); + + setVariantDefined( Base_CUDA ); + setVariantDefined( Lambda_CUDA ); + setVariantDefined( RAJA_CUDA ); + + setVariantDefined( Base_HIP ); + setVariantDefined( Lambda_HIP ); + setVariantDefined( RAJA_HIP ); +} + +MEMCPY::~MEMCPY() +{ +} + +void MEMCPY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + allocAndInitDataConst(m_x, getActualProblemSize(), 0.0, vid); + allocAndInitDataConst(m_y, getActualProblemSize(), -1.234567e89, vid); +} + +void MEMCPY::updateChecksum(VariantID vid, size_t tune_idx) +{ + checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize()); +} + +void MEMCPY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + (void) vid; + deallocData(m_x); + deallocData(m_y); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp new file mode 100644 index 000000000..9fa46ae9e --- /dev/null +++ b/src/algorithm/MEMCPY.hpp @@ -0,0 +1,83 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// MEMCPY kernel reference implementation: +/// +/// for (Index_type i = ibegin; i < iend; ++i ) { +/// x[i] = val ; +/// } +/// + +#ifndef RAJAPerf_Algorithm_MEMCPY_HPP +#define RAJAPerf_Algorithm_MEMCPY_HPP + +#define MEMCPY_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ + +#define MEMCPY_STD_ARGS \ + y + ibegin, x + ibegin, (iend-ibegin)*sizeof(Real_type) + +#define MEMCPY_BODY \ + y[i] = x[i]; + + +#include "common/KernelBase.hpp" + +namespace rajaperf +{ +class RunParams; + +namespace algorithm +{ + +class MEMCPY : public KernelBase +{ +public: + + MEMCPY(const RunParams& params); + + ~MEMCPY(); + + void setUp(VariantID vid, size_t tune_idx); + void updateChecksum(VariantID vid, size_t tune_idx); + void tearDown(VariantID vid, size_t tune_idx); + + void runSeqVariant(VariantID vid, size_t tune_idx); + void runOpenMPVariant(VariantID vid, size_t tune_idx); + void runCudaVariant(VariantID vid, size_t tune_idx); + void runHipVariant(VariantID vid, size_t tune_idx); + void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + + void setSeqTuningDefinitions(VariantID vid); + void setCudaTuningDefinitions(VariantID vid); + void setHipTuningDefinitions(VariantID vid); + void runSeqVariantDefault(VariantID vid); + void runSeqVariantLibrary(VariantID vid); + + template < size_t block_size > + void runCudaVariantBlock(VariantID vid); + void runCudaVariantLibrary(VariantID vid); + + template < size_t block_size > + void runHipVariantBlock(VariantID vid); + void runHipVariantLibrary(VariantID vid); + +private: + static const size_t default_gpu_block_size = 256; + using gpu_block_sizes_type = gpu_block_size::make_list_type; + + Real_ptr m_x; + Real_ptr m_y; +}; + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/algorithm/MEMSET-Cuda.cpp b/src/algorithm/MEMSET-Cuda.cpp new file mode 100644 index 000000000..e5f33b84a --- /dev/null +++ b/src/algorithm/MEMSET-Cuda.cpp @@ -0,0 +1,218 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_CUDA) + +#include "common/CudaDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + +#define MEMSET_DATA_SETUP_CUDA \ + allocAndInitCudaDeviceData(x, m_x, iend); + +#define MEMSET_DATA_TEARDOWN_CUDA \ + getCudaDeviceData(m_x, x, iend); \ + deallocCudaDeviceData(x); + +template < size_t block_size > +__launch_bounds__(block_size) +__global__ void memset(Real_ptr x, Real_type val, + Index_type iend) +{ + Index_type i = blockIdx.x * block_size + threadIdx.x; + if ( i < iend ) { + MEMSET_BODY; + } +} + + +void MEMSET::runCudaVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + if ( vid == Base_CUDA ) { + + MEMSET_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + cudaErrchk( cudaMemsetAsync(MEMSET_STD_ARGS, 0) ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_CUDA; + + } else if ( vid == RAJA_CUDA ) { + + MEMSET_DATA_SETUP_CUDA; + + camp::resources::Cuda res = camp::resources::Cuda::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_CUDA; + + } else { + + getCout() << "\n MEMSET : Unknown Cuda variant id = " << vid << std::endl; + + } + +} + +template < size_t block_size > +void MEMSET::runCudaVariantBlock(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + if ( vid == Base_CUDA ) { + + MEMSET_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + memset<<>>( x, + val, + iend ); + cudaErrchk( cudaGetLastError() ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_CUDA; + + } else if ( vid == Lambda_CUDA ) { + + MEMSET_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto memset_lambda = [=] __device__ (Index_type i) { + MEMSET_BODY; + }; + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + lambda_cuda_forall<<>>( + ibegin, iend, memset_lambda ); + cudaErrchk( cudaGetLastError() ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_CUDA; + + } else if ( vid == RAJA_CUDA ) { + + MEMSET_DATA_SETUP_CUDA; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall< RAJA::cuda_exec >( + RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_CUDA; + + } else { + + getCout() << "\n MEMSET : Unknown Cuda variant id = " << vid << std::endl; + + } + +} + +void MEMSET::runCudaVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_CUDA || vid == RAJA_CUDA) { + + if (tune_idx == t) { + + runCudaVariantLibrary(vid); + + } + + t += 1; + + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + if (tune_idx == t) { + + runCudaVariantBlock(vid); + + } + + t += 1; + + } + + }); +} + +void MEMSET::setCudaTuningDefinitions(VariantID vid) +{ + if (vid == Base_CUDA || vid == RAJA_CUDA) { + addVariantTuningName(vid, "library"); + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + addVariantTuningName(vid, "block_"+std::to_string(block_size)); + + } + + }); +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_CUDA diff --git a/src/algorithm/MEMSET-Hip.cpp b/src/algorithm/MEMSET-Hip.cpp new file mode 100644 index 000000000..c7db620d0 --- /dev/null +++ b/src/algorithm/MEMSET-Hip.cpp @@ -0,0 +1,220 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_HIP) + +#include "common/HipDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + +#define MEMSET_DATA_SETUP_HIP \ + allocAndInitHipDeviceData(x, m_x, iend); + +#define MEMSET_DATA_TEARDOWN_HIP \ + getHipDeviceData(m_x, x, iend); \ + deallocHipDeviceData(x); + +template < size_t block_size > +__launch_bounds__(block_size) +__global__ void memset(Real_ptr x, Real_type val, + Index_type iend) +{ + Index_type i = blockIdx.x * block_size + threadIdx.x; + if ( i < iend ) { + MEMSET_BODY; + } +} + + +void MEMSET::runHipVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + if ( vid == Base_HIP ) { + + MEMSET_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + hipErrchk( hipMemsetAsync(MEMSET_STD_ARGS, 0) ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_HIP; + + } else if ( vid == RAJA_HIP ) { + + MEMSET_DATA_SETUP_HIP; + + camp::resources::Hip res = camp::resources::Hip::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_HIP; + + } else { + + getCout() << "\n MEMSET : Unknown Hip variant id = " << vid << std::endl; + + } + +} + +template < size_t block_size > +void MEMSET::runHipVariantBlock(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + if ( vid == Base_HIP ) { + + MEMSET_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + hipLaunchKernelGGL( (memset), + dim3(grid_size), dim3(block_size), 0, 0, + x, val, iend ); + hipErrchk( hipGetLastError() ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_HIP; + + } else if ( vid == Lambda_HIP ) { + + MEMSET_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto memset_lambda = [=] __device__ (Index_type i) { + MEMSET_BODY; + }; + + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + hipLaunchKernelGGL((lambda_hip_forall), + grid_size, block_size, 0, 0, + ibegin, iend, memset_lambda); + hipErrchk( hipGetLastError() ); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_HIP; + + } else if ( vid == RAJA_HIP ) { + + MEMSET_DATA_SETUP_HIP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall< RAJA::hip_exec >( + RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_HIP; + + } else { + + getCout() << "\n MEMSET : Unknown Hip variant id = " << vid << std::endl; + + } + +} + +void MEMSET::runHipVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_HIP || vid == RAJA_HIP) { + + if (tune_idx == t) { + + runHipVariantLibrary(vid); + + } + + t += 1; + + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + if (tune_idx == t) { + + runHipVariantBlock(vid); + + } + + t += 1; + + } + + }); + +} + +void MEMSET::setHipTuningDefinitions(VariantID vid) +{ + if (vid == Base_HIP || vid == RAJA_HIP) { + addVariantTuningName(vid, "library"); + } + + seq_for(gpu_block_sizes_type{}, [&](auto block_size) { + + if (run_params.numValidGPUBlockSize() == 0u || + run_params.validGPUBlockSize(block_size)) { + + addVariantTuningName(vid, "block_"+std::to_string(block_size)); + + } + + }); + +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_HIP diff --git a/src/algorithm/MEMSET-OMP.cpp b/src/algorithm/MEMSET-OMP.cpp new file mode 100644 index 000000000..ebd931e4d --- /dev/null +++ b/src/algorithm/MEMSET-OMP.cpp @@ -0,0 +1,98 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMSET::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp parallel for + for (Index_type i = ibegin; i < iend; ++i ) { + MEMSET_BODY; + } + + } + stopTimer(); + + break; + } + + case Lambda_OpenMP : { + + auto memset_lambda = [=](Index_type i) { + MEMSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp parallel for + for (Index_type i = ibegin; i < iend; ++i ) { + memset_lambda(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMSET-OMPTarget.cpp b/src/algorithm/MEMSET-OMPTarget.cpp new file mode 100644 index 000000000..b60454c8c --- /dev/null +++ b/src/algorithm/MEMSET-OMPTarget.cpp @@ -0,0 +1,94 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_TARGET_OPENMP) + +#include "common/OpenMPTargetDataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + // + // Define threads per team for target execution + // + const size_t threads_per_team = 256; + +#define MEMSET_DATA_SETUP_OMP_TARGET \ + int hid = omp_get_initial_device(); \ + int did = omp_get_default_device(); \ +\ + allocAndInitOpenMPDeviceData(x, m_x, iend, did, hid); + +#define MEMSET_DATA_TEARDOWN_OMP_TARGET \ + getOpenMPDeviceData(m_x, x, iend, hid, did); \ + deallocOpenMPDeviceData(x, did); \ + + +void MEMSET::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + if ( vid == Base_OpenMPTarget ) { + + MEMSET_DATA_SETUP_OMP_TARGET + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp target is_device_ptr(x) device( did ) + #pragma omp teams distribute parallel for \ + thread_limit(threads_per_team) schedule(static, 1) + for (Index_type i = ibegin; i < iend; ++i ) { + MEMSET_BODY; + } + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_OMP_TARGET + + } else if ( vid == RAJA_OpenMPTarget ) { + + MEMSET_DATA_SETUP_OMP_TARGET + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall>( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + MEMSET_DATA_TEARDOWN_OMP_TARGET + + } else { + getCout() << "\n MEMSET : Unknown OMP Target variant id = " << vid << std::endl; + } + +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // RAJA_ENABLE_TARGET_OPENMP diff --git a/src/algorithm/MEMSET-Seq.cpp b/src/algorithm/MEMSET-Seq.cpp new file mode 100644 index 000000000..0ff80698a --- /dev/null +++ b/src/algorithm/MEMSET-Seq.cpp @@ -0,0 +1,175 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMSET::runSeqVariantLibrary(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_Seq : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +} + +void MEMSET::runSeqVariantDefault(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + MEMSET_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto memset_lambda = [=](Index_type i) { + MEMSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + memset_lambda(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +} + +void MEMSET::runSeqVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_Seq || vid == RAJA_Seq) { + + if (tune_idx == t) { + + runSeqVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runSeqVariantDefault(vid); + + } + + t += 1; +} + +void MEMSET::setSeqTuningDefinitions(VariantID vid) +{ + if (vid == Base_Seq || vid == RAJA_Seq) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp new file mode 100644 index 000000000..98fc2e4db --- /dev/null +++ b/src/algorithm/MEMSET.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/DataUtils.hpp" + +namespace rajaperf +{ +namespace algorithm +{ + + +MEMSET::MEMSET(const RunParams& params) + : KernelBase(rajaperf::Algorithm_MEMSET, params) +{ + setDefaultProblemSize(1000000); + setDefaultReps(100); + + setActualProblemSize( getTargetProblemSize() ); + + setItsPerRep( getActualProblemSize() ); + setKernelsPerRep(1); + setBytesPerRep( (0*sizeof(Real_type) + 1*sizeof(Real_type)) + + (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getActualProblemSize() ); + setFLOPsPerRep(0); + + setUsesFeature(Forall); + + setVariantDefined( Base_Seq ); + setVariantDefined( Lambda_Seq ); + setVariantDefined( RAJA_Seq ); + + setVariantDefined( Base_OpenMP ); + setVariantDefined( Lambda_OpenMP ); + setVariantDefined( RAJA_OpenMP ); + + setVariantDefined( Base_OpenMPTarget ); + setVariantDefined( RAJA_OpenMPTarget ); + + setVariantDefined( Base_CUDA ); + setVariantDefined( Lambda_CUDA ); + setVariantDefined( RAJA_CUDA ); + + setVariantDefined( Base_HIP ); + setVariantDefined( Lambda_HIP ); + setVariantDefined( RAJA_HIP ); +} + +MEMSET::~MEMSET() +{ +} + +void MEMSET::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + allocAndInitDataConst(m_x, getActualProblemSize(), -1.234567e89, vid); + m_val = 0.0; +} + +void MEMSET::updateChecksum(VariantID vid, size_t tune_idx) +{ + checksum[vid].at(tune_idx) += calcChecksum(m_x, getActualProblemSize()); +} + +void MEMSET::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + (void) vid; + deallocData(m_x); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp new file mode 100644 index 000000000..ebf2f867b --- /dev/null +++ b/src/algorithm/MEMSET.hpp @@ -0,0 +1,83 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// MEMSET kernel reference implementation: +/// +/// for (Index_type i = ibegin; i < iend; ++i ) { +/// x[i] = val ; +/// } +/// + +#ifndef RAJAPerf_Algorithm_MEMSET_HPP +#define RAJAPerf_Algorithm_MEMSET_HPP + +#define MEMSET_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_type val = m_val; + +#define MEMSET_STD_ARGS \ + x + ibegin, (int)val, (iend-ibegin)*sizeof(Real_type) + +#define MEMSET_BODY \ + x[i] = val; + + +#include "common/KernelBase.hpp" + +namespace rajaperf +{ +class RunParams; + +namespace algorithm +{ + +class MEMSET : public KernelBase +{ +public: + + MEMSET(const RunParams& params); + + ~MEMSET(); + + void setUp(VariantID vid, size_t tune_idx); + void updateChecksum(VariantID vid, size_t tune_idx); + void tearDown(VariantID vid, size_t tune_idx); + + void runSeqVariant(VariantID vid, size_t tune_idx); + void runOpenMPVariant(VariantID vid, size_t tune_idx); + void runCudaVariant(VariantID vid, size_t tune_idx); + void runHipVariant(VariantID vid, size_t tune_idx); + void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + + void setSeqTuningDefinitions(VariantID vid); + void setCudaTuningDefinitions(VariantID vid); + void setHipTuningDefinitions(VariantID vid); + void runSeqVariantDefault(VariantID vid); + void runSeqVariantLibrary(VariantID vid); + + template < size_t block_size > + void runCudaVariantBlock(VariantID vid); + void runCudaVariantLibrary(VariantID vid); + + template < size_t block_size > + void runHipVariantBlock(VariantID vid); + void runHipVariantLibrary(VariantID vid); + +private: + static const size_t default_gpu_block_size = 256; + using gpu_block_sizes_type = gpu_block_size::make_list_type; + + Real_ptr m_x; + Real_type m_val; +}; + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/algorithm/REDUCE_SUM-Cuda.cpp b/src/algorithm/REDUCE_SUM-Cuda.cpp index c8b4bb8e4..66b0fb84d 100644 --- a/src/algorithm/REDUCE_SUM-Cuda.cpp +++ b/src/algorithm/REDUCE_SUM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM-Hip.cpp b/src/algorithm/REDUCE_SUM-Hip.cpp index 691db7fae..09f9c7a30 100644 --- a/src/algorithm/REDUCE_SUM-Hip.cpp +++ b/src/algorithm/REDUCE_SUM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM-OMP.cpp b/src/algorithm/REDUCE_SUM-OMP.cpp index 8f80b5633..49d0d766e 100644 --- a/src/algorithm/REDUCE_SUM-OMP.cpp +++ b/src/algorithm/REDUCE_SUM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM-OMPTarget.cpp b/src/algorithm/REDUCE_SUM-OMPTarget.cpp index b3bf8ac05..eff2bee26 100644 --- a/src/algorithm/REDUCE_SUM-OMPTarget.cpp +++ b/src/algorithm/REDUCE_SUM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM-Seq.cpp b/src/algorithm/REDUCE_SUM-Seq.cpp index d4fc7cddf..9c7336c2d 100644 --- a/src/algorithm/REDUCE_SUM-Seq.cpp +++ b/src/algorithm/REDUCE_SUM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp index f85b982f6..8ccd2bbab 100644 --- a/src/algorithm/REDUCE_SUM.cpp +++ b/src/algorithm/REDUCE_SUM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/REDUCE_SUM.hpp b/src/algorithm/REDUCE_SUM.hpp index f6dba52db..ba9e9308b 100644 --- a/src/algorithm/REDUCE_SUM.hpp +++ b/src/algorithm/REDUCE_SUM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SCAN-Cuda.cpp b/src/algorithm/SCAN-Cuda.cpp index 0f9612c23..34a9c1f1e 100644 --- a/src/algorithm/SCAN-Cuda.cpp +++ b/src/algorithm/SCAN-Cuda.cpp @@ -106,7 +106,7 @@ void SCAN::runCudaVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) SCAN_DATA_TEARDOWN_CUDA; } else { - std::cout << "\n SCAN : Unknown Cuda variant id = " << vid << std::endl; + getCout() << "\n SCAN : Unknown Cuda variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SCAN-Hip.cpp b/src/algorithm/SCAN-Hip.cpp index 6ddccb115..14a0f647b 100644 --- a/src/algorithm/SCAN-Hip.cpp +++ b/src/algorithm/SCAN-Hip.cpp @@ -133,7 +133,7 @@ void SCAN::runHipVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) SCAN_DATA_TEARDOWN_HIP; } else { - std::cout << "\n SCAN : Unknown Hip variant id = " << vid << std::endl; + getCout() << "\n SCAN : Unknown Hip variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SCAN-OMP.cpp b/src/algorithm/SCAN-OMP.cpp index 3d21e1e0b..920a4b8e6 100644 --- a/src/algorithm/SCAN-OMP.cpp +++ b/src/algorithm/SCAN-OMP.cpp @@ -175,7 +175,7 @@ void SCAN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } default : { - std::cout << "\n SCAN : Unknown variant id = " << vid << std::endl; + getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SCAN-OMPTarget.cpp b/src/algorithm/SCAN-OMPTarget.cpp index 16a1fc1fc..f7f18588a 100644 --- a/src/algorithm/SCAN-OMPTarget.cpp +++ b/src/algorithm/SCAN-OMPTarget.cpp @@ -10,6 +10,10 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_TARGET_OPENMP) + +#include "common/OpenMPTargetDataUtils.hpp" + #include #include @@ -18,9 +22,6 @@ namespace rajaperf namespace algorithm { -#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) \ - && _OPENMP >= 201811 && defined(RAJA_PERFSUITE_ENABLE_OPENMP5_SCAN) - // // Define threads per team for target execution // @@ -38,13 +39,10 @@ namespace algorithm deallocOpenMPDeviceData(x, did); \ deallocOpenMPDeviceData(y, did); -#endif - void SCAN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { -#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) \ - && _OPENMP >= 201811 && defined(RAJA_PERFSUITE_ENABLE_OPENMP5_SCAN) +#if _OPENMP >= 201811 && defined(RAJA_PERFSUITE_ENABLE_OPENMP5_SCAN) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; @@ -56,6 +54,8 @@ void SCAN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune case Base_OpenMPTarget : { + SCAN_DATA_SETUP_OMP_TARGET; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -73,9 +73,13 @@ void SCAN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } stopTimer(); + SCAN_DATA_TEARDOWN_OMP_TARGET; + break; } + } + #else RAJA_UNUSED_VAR(vid); #endif @@ -83,3 +87,5 @@ void SCAN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_TARGET_OPENMP diff --git a/src/algorithm/SCAN-Seq.cpp b/src/algorithm/SCAN-Seq.cpp index b658ca41d..c1765755c 100644 --- a/src/algorithm/SCAN-Seq.cpp +++ b/src/algorithm/SCAN-Seq.cpp @@ -79,7 +79,7 @@ void SCAN::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) #endif default : { - std::cout << "\n SCAN : Unknown variant id = " << vid << std::endl; + getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SORT-Cuda.cpp b/src/algorithm/SORT-Cuda.cpp index 599a9f246..33a2afd59 100644 --- a/src/algorithm/SORT-Cuda.cpp +++ b/src/algorithm/SORT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORT-Hip.cpp b/src/algorithm/SORT-Hip.cpp index 6c80fbc97..7133dc13a 100644 --- a/src/algorithm/SORT-Hip.cpp +++ b/src/algorithm/SORT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORT-OMP.cpp b/src/algorithm/SORT-OMP.cpp index f62a7dbf6..05b885d50 100644 --- a/src/algorithm/SORT-OMP.cpp +++ b/src/algorithm/SORT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORT-Seq.cpp b/src/algorithm/SORT-Seq.cpp index ebba50994..85e353f5a 100644 --- a/src/algorithm/SORT-Seq.cpp +++ b/src/algorithm/SORT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index b9722c4d7..ce059ad75 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index 0670c9dd0..b51bf12f9 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS-Cuda.cpp b/src/algorithm/SORTPAIRS-Cuda.cpp index 0c09bfe1f..c7d30128c 100644 --- a/src/algorithm/SORTPAIRS-Cuda.cpp +++ b/src/algorithm/SORTPAIRS-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS-Hip.cpp b/src/algorithm/SORTPAIRS-Hip.cpp index fbdbc660d..ed9a2dc41 100644 --- a/src/algorithm/SORTPAIRS-Hip.cpp +++ b/src/algorithm/SORTPAIRS-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS-OMP.cpp b/src/algorithm/SORTPAIRS-OMP.cpp index 5fabe18da..39705af9a 100644 --- a/src/algorithm/SORTPAIRS-OMP.cpp +++ b/src/algorithm/SORTPAIRS-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS-Seq.cpp b/src/algorithm/SORTPAIRS-Seq.cpp index 64fee5d1e..551410c29 100644 --- a/src/algorithm/SORTPAIRS-Seq.cpp +++ b/src/algorithm/SORTPAIRS-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index df175844e..65ef046ff 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp index 658d3ad4b..4cfc3eb36 100644 --- a/src/algorithm/SORTPAIRS.hpp +++ b/src/algorithm/SORTPAIRS.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/AppsData.cpp b/src/apps/AppsData.cpp index 390412aa3..9bb06b261 100644 --- a/src/apps/AppsData.cpp +++ b/src/apps/AppsData.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/AppsData.hpp b/src/apps/AppsData.hpp index f9b9251e9..afbfe98e5 100644 --- a/src/apps/AppsData.hpp +++ b/src/apps/AppsData.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index cf1ed84ea..6d521d1df 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # @@ -9,6 +9,12 @@ blt_add_library( NAME apps SOURCES AppsData.cpp + CONVECTION3DPA.cpp + CONVECTION3DPA-Cuda.cpp + CONVECTION3DPA-Hip.cpp + CONVECTION3DPA-Seq.cpp + CONVECTION3DPA-OMP.cpp + CONVECTION3DPA-OMPTarget.cpp DEL_DOT_VEC_2D.cpp DEL_DOT_VEC_2D-Seq.cpp DEL_DOT_VEC_2D-Hip.cpp diff --git a/src/apps/CONVECTION3DPA-Cuda.cpp b/src/apps/CONVECTION3DPA-Cuda.cpp new file mode 100644 index 000000000..44d870e9e --- /dev/null +++ b/src/apps/CONVECTION3DPA-Cuda.cpp @@ -0,0 +1,379 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_CUDA) + +#include "common/CudaDataUtils.hpp" + +#include + +namespace rajaperf { +namespace apps { + +#define CONVECTION3DPA_DATA_SETUP_CUDA \ + allocAndInitCudaDeviceData(Basis, m_B, CPA_Q1D *CPA_D1D); \ + allocAndInitCudaDeviceData(tBasis, m_Bt, CPA_Q1D *CPA_D1D); \ + allocAndInitCudaDeviceData(dBasis, m_G, CPA_Q1D *CPA_D1D); \ + allocAndInitCudaDeviceData(D, m_D, CPA_Q1D *CPA_Q1D *CPA_Q1D *CPA_VDIM *m_NE); \ + allocAndInitCudaDeviceData(X, m_X, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); \ + allocAndInitCudaDeviceData(Y, m_Y, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); + +#define CONVECTION3DPA_DATA_TEARDOWN_CUDA \ + getCudaDeviceData(m_Y, Y, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); \ + deallocCudaDeviceData(Basis); \ + deallocCudaDeviceData(tBasis); \ + deallocCudaDeviceData(dBasis); \ + deallocCudaDeviceData(D); \ + deallocCudaDeviceData(X); \ + deallocCudaDeviceData(Y); + +template < size_t block_size > + __launch_bounds__(block_size) +__global__ void Convection3DPA(const Real_ptr Basis, const Real_ptr tBasis, + const Real_ptr dBasis, const Real_ptr D, + const Real_ptr X, Real_ptr Y) { + + const int e = blockIdx.x; + + CONVECTION3DPA_0_GPU; + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qz,z,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + +} + +template < size_t block_size > +void CONVECTION3DPA::runCudaVariantImpl(VariantID vid) { + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_CUDA: { + + CONVECTION3DPA_DATA_SETUP_CUDA; + + dim3 nthreads_per_block(CPA_Q1D, CPA_Q1D, CPA_Q1D); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Convection3DPA<<>> + (Basis, tBasis, dBasis, D, X, Y); + + cudaErrchk(cudaGetLastError()); + } + stopTimer(); + + CONVECTION3DPA_DATA_TEARDOWN_CUDA; + + break; + } + + case RAJA_CUDA: { + + CONVECTION3DPA_DATA_SETUP_CUDA; + + constexpr bool async = true; + + using launch_policy = + RAJA::LaunchPolicy>; + + using outer_x = + RAJA::LoopPolicy; + + using inner_x = + RAJA::LoopPolicy; + + using inner_y = + RAJA::LoopPolicy; + + using inner_z = + RAJA::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(CPA_Q1D, CPA_Q1D, CPA_Q1D)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_GPU; + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (dx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (qx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + } // lambda (e) + ); // RAJA::loop + + } // outer lambda (ctx) + ); // RAJA::launch + + } // loop over kernel reps + stopTimer(); + + CONVECTION3DPA_DATA_TEARDOWN_CUDA; + + break; + } + + default: { + + getCout() << "\n CONVECTION3DPA : Unknown Cuda variant id = " << vid + << std::endl; + break; + } + } +} + +RAJAPERF_GPU_BLOCK_SIZE_TUNING_DEFINE_BIOLERPLATE(CONVECTION3DPA, Cuda) + +} // end namespace apps +} // end namespace rajaperf + +#endif // RAJA_ENABLE_CUDA diff --git a/src/apps/CONVECTION3DPA-Hip.cpp b/src/apps/CONVECTION3DPA-Hip.cpp new file mode 100644 index 000000000..3d2cca4c8 --- /dev/null +++ b/src/apps/CONVECTION3DPA-Hip.cpp @@ -0,0 +1,381 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_HIP) + +#include "common/HipDataUtils.hpp" + +#include + +namespace rajaperf { +namespace apps { + +#define CONVECTION3DPA_DATA_SETUP_HIP \ + allocAndInitHipDeviceData(Basis, m_B, CPA_Q1D *CPA_D1D); \ + allocAndInitHipDeviceData(tBasis, m_Bt, CPA_Q1D *CPA_D1D); \ + allocAndInitHipDeviceData(dBasis, m_G, CPA_Q1D *CPA_D1D); \ + allocAndInitHipDeviceData(D, m_D, CPA_Q1D *CPA_Q1D *CPA_Q1D *CPA_VDIM *m_NE); \ + allocAndInitHipDeviceData(X, m_X, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); \ + allocAndInitHipDeviceData(Y, m_Y, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); + +#define CONVECTION3DPA_DATA_TEARDOWN_HIP \ + getHipDeviceData(m_Y, Y, CPA_D1D *CPA_D1D *CPA_D1D *m_NE); \ + deallocHipDeviceData(Basis); \ + deallocHipDeviceData(tBasis); \ + deallocHipDeviceData(dBasis); \ + deallocHipDeviceData(D); \ + deallocHipDeviceData(X); \ + deallocHipDeviceData(Y); + +template < size_t block_size > + __launch_bounds__(block_size) +__global__ void Convection3DPA(const Real_ptr Basis, const Real_ptr tBasis, + const Real_ptr dBasis, const Real_ptr D, + const Real_ptr X, Real_ptr Y) { + + const int e = blockIdx.x; + + CONVECTION3DPA_0_GPU; + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qz,z,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(qy,y,CPA_Q1D) + { + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(qx,x,CPA_Q1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + __syncthreads(); + + GPU_FOREACH_THREAD(dz,z,CPA_D1D) + { + GPU_FOREACH_THREAD(dy,y,CPA_D1D) + { + GPU_FOREACH_THREAD(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + +} + +template < size_t block_size > +void CONVECTION3DPA::runHipVariantImpl(VariantID vid) { + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_HIP: { + + CONVECTION3DPA_DATA_SETUP_HIP; + + dim3 nblocks(NE); + dim3 nthreads_per_block(CPA_Q1D, CPA_Q1D, CPA_Q1D); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + hipLaunchKernelGGL((Convection3DPA), + dim3(nblocks), dim3(nthreads_per_block), 0, 0, + Basis, tBasis, dBasis, D, X, Y); + + hipErrchk(hipGetLastError()); + } + stopTimer(); + + CONVECTION3DPA_DATA_TEARDOWN_HIP; + + break; + } + + case RAJA_HIP: { + + CONVECTION3DPA_DATA_SETUP_HIP; + + constexpr bool async = true; + + using launch_policy = + RAJA::LaunchPolicy>; + + using outer_x = + RAJA::LoopPolicy; + + using inner_x = + RAJA::LoopPolicy; + + using inner_y = + RAJA::LoopPolicy; + + using inner_z = + RAJA::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(CPA_Q1D, CPA_Q1D, CPA_Q1D)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_GPU; + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (dx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (qx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + } // lambda (e) + ); // RAJA::loop + + } // outer lambda (ctx) + ); // RAJA::launch + + } // loop over kernel reps + stopTimer(); + + CONVECTION3DPA_DATA_TEARDOWN_HIP; + + break; + } + + default: { + + getCout() << "\n CONVECTION3DPA : Unknown Hip variant id = " << vid + << std::endl; + break; + } + } +} + +RAJAPERF_GPU_BLOCK_SIZE_TUNING_DEFINE_BIOLERPLATE(CONVECTION3DPA, Hip) + +} // end namespace apps +} // end namespace rajaperf + +#endif // RAJA_ENABLE_HIP diff --git a/src/apps/CONVECTION3DPA-OMP.cpp b/src/apps/CONVECTION3DPA-OMP.cpp new file mode 100644 index 000000000..237ebe87c --- /dev/null +++ b/src/apps/CONVECTION3DPA-OMP.cpp @@ -0,0 +1,321 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + +#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) + + const Index_type run_reps = getRunReps(); + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_OpenMP: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#pragma omp parallel for + for (int e = 0; e < NE; ++e) { + + CONVECTION3DPA_0_CPU; + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + + CPU_FOREACH(qz,z,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + + } // element loop + } + stopTimer(); + + break; + } + + case RAJA_OpenMP: { + + using launch_policy = RAJA::LaunchPolicy; + + using outer_x = RAJA::LoopPolicy; + + using inner_x = RAJA::LoopPolicy; + + using inner_y = RAJA::LoopPolicy; + + using inner_z = RAJA::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_CPU; + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (dx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (qx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + } // lambda (e) + ); // RAJA::loop + + } // outer lambda (ctx) + ); // RAJA::launch + } // loop over kernel reps + stopTimer(); + + return; + } + + default: + getCout() << "\n CONVECTION3DPA : Unknown OpenMP variant id = " << vid + << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/CONVECTION3DPA-OMPTarget.cpp b/src/apps/CONVECTION3DPA-OMPTarget.cpp new file mode 100644 index 000000000..92ee3d987 --- /dev/null +++ b/src/apps/CONVECTION3DPA-OMPTarget.cpp @@ -0,0 +1,39 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_TARGET_OPENMP) + +#include "common/OpenMPTargetDataUtils.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + switch (vid) { + + default: { + + getCout() << "\n CONVECTION3DPA : Unknown OpenMPTarget variant id = " << vid + << std::endl; + break; + } + } +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // RAJA_ENABLE_TARGET_OPENMP diff --git a/src/apps/CONVECTION3DPA-Seq.cpp b/src/apps/CONVECTION3DPA-Seq.cpp new file mode 100644 index 000000000..b3b644821 --- /dev/null +++ b/src/apps/CONVECTION3DPA-Seq.cpp @@ -0,0 +1,316 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_Seq: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (int e = 0; e < NE; ++e) { + + CONVECTION3DPA_0_CPU; + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + + CPU_FOREACH(qz,z,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + } // element loop + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_Seq: { + + using launch_policy = RAJA::LaunchPolicy; + + using outer_x = RAJA::LoopPolicy; + + using inner_x = RAJA::LoopPolicy; + + using inner_y = RAJA::LoopPolicy; + + using inner_z = RAJA::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_CPU; + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (dx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::loop + } // lambda (qy) + ); //RAJA::loop + } // lambda (qx) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::loop + } // lambda (qx) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + ctx.teamSync(); + + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::loop + } // lambda (dy) + ); //RAJA::loop + } // lambda (dz) + ); //RAJA::loop + + } // lambda (e) + ); // RAJA::loop + + } // outer lambda (ctx) + ); // RAJA::launch + } // loop over kernel reps + stopTimer(); + + return; + } +#endif // RUN_RAJA_SEQ + + default: + getCout() << "\n CONVECTION3DPA : Unknown Seq variant id = " << vid + << std::endl; + } +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp new file mode 100644 index 000000000..83010024b --- /dev/null +++ b/src/apps/CONVECTION3DPA.cpp @@ -0,0 +1,102 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/DataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) + : KernelBase(rajaperf::Apps_CONVECTION3DPA, params) +{ + m_NE_default = 15625; + + setDefaultProblemSize(m_NE_default*CPA_Q1D*CPA_Q1D*CPA_Q1D); + setDefaultReps(50); + + m_NE = std::max(getTargetProblemSize()/(CPA_Q1D*CPA_Q1D*CPA_Q1D), Index_type(1)); + + setActualProblemSize( m_NE*CPA_Q1D*CPA_Q1D*CPA_Q1D ); + + setItsPerRep(getActualProblemSize()); + setKernelsPerRep(1); + + setBytesPerRep( 3*CPA_Q1D*CPA_D1D*sizeof(Real_type) + + CPA_VDIM*CPA_Q1D*CPA_Q1D*CPA_Q1D*m_NE*sizeof(Real_type) + + CPA_D1D*CPA_D1D*CPA_D1D*m_NE*sizeof(Real_type) + + CPA_D1D*CPA_D1D*CPA_D1D*m_NE*sizeof(Real_type) ); + + setFLOPsPerRep(m_NE * ( + 4 * CPA_D1D * CPA_Q1D * CPA_D1D * CPA_D1D + //2 + 6 * CPA_D1D * CPA_Q1D * CPA_Q1D * CPA_D1D + //3 + 6 * CPA_D1D * CPA_Q1D * CPA_Q1D * CPA_Q1D + //4 + 5 * CPA_Q1D * CPA_Q1D * CPA_Q1D + // 5 + 2 * CPA_Q1D * CPA_D1D * CPA_Q1D * CPA_Q1D + // 6 + 2 * CPA_Q1D * CPA_D1D * CPA_Q1D * CPA_D1D + // 7 + (1 + 2*CPA_Q1D) * CPA_D1D * CPA_D1D * CPA_D1D // 8 + )); + + setUsesFeature(Teams); + + setVariantDefined( Base_Seq ); + setVariantDefined( RAJA_Seq ); + + setVariantDefined( Base_OpenMP ); + setVariantDefined( RAJA_OpenMP ); + + setVariantDefined( Base_CUDA ); + setVariantDefined( RAJA_CUDA ); + + setVariantDefined( Base_HIP ); + setVariantDefined( RAJA_HIP ); + +} + +CONVECTION3DPA::~CONVECTION3DPA() +{ +} + +void CONVECTION3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + + allocAndInitDataConst(m_B, int(CPA_Q1D*CPA_D1D), Real_type(1.0), vid); + allocAndInitDataConst(m_Bt, int(CPA_Q1D*CPA_D1D), Real_type(1.0), vid); + allocAndInitDataConst(m_G, int(CPA_Q1D*CPA_D1D), Real_type(1.0), vid); + allocAndInitDataConst(m_D, int(CPA_Q1D*CPA_Q1D*CPA_Q1D*CPA_VDIM*m_NE), Real_type(1.0), vid); + allocAndInitDataConst(m_X, int(CPA_D1D*CPA_D1D*CPA_D1D*m_NE), Real_type(1.0), vid); + allocAndInitDataConst(m_Y, int(CPA_D1D*CPA_D1D*CPA_D1D*m_NE), Real_type(0.0), vid); +} + +void CONVECTION3DPA::updateChecksum(VariantID vid, size_t tune_idx) +{ + checksum[vid][tune_idx] += calcChecksum(m_Y, CPA_D1D*CPA_D1D*CPA_D1D*m_NE); +} + +void CONVECTION3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + (void) vid; + + deallocData(m_B); + deallocData(m_Bt); + deallocData(m_G); + deallocData(m_D); + deallocData(m_X); + deallocData(m_Y); +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/CONVECTION3DPA.hpp b/src/apps/CONVECTION3DPA.hpp new file mode 100644 index 000000000..784b2d4cd --- /dev/null +++ b/src/apps/CONVECTION3DPA.hpp @@ -0,0 +1,408 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Action of 3D diffusion matrix via partial assembly +/// +/// Based on MFEM's/CEED algorithms. +/// Reference implementation +/// https://github.com/mfem/mfem/blob/master/fem/bilininteg_convection_pa.cpp +/// +/// +/// for(int e = 0; e < NE; ++e) { +/// +/// constexpr int max_D1D = CPA_D1D; +/// constexpr int max_Q1D = CPA_Q1D; +/// constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D; +/// MFEM_SHARED double sm0[max_DQ*max_DQ*max_DQ]; +/// MFEM_SHARED double sm1[max_DQ*max_DQ*max_DQ]; +/// MFEM_SHARED double sm2[max_DQ*max_DQ*max_DQ]; +/// MFEM_SHARED double sm3[max_DQ*max_DQ*max_DQ]; +/// MFEM_SHARED double sm4[max_DQ*max_DQ*max_DQ]; +/// MFEM_SHARED double sm5[max_DQ*max_DQ*max_DQ]; +/// +/// double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// for(int dy = 0; dy < CPA_D1D; ++dy) +/// { +/// for(int dx = 0; dx < CPA_D1D; ++dx) +/// { +/// u[dz][dy][dx] = cpaX_(dx,dy,dz,e); +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1; +/// double (*Gu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm2; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// for(int dy = 0; dy < CPA_D1D; ++dy) +/// { +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// double Bu_ = 0.0; +/// double Gu_ = 0.0; +/// for(int dx = 0; dx < CPA_D1D; ++dx) +/// { +/// const double bx = cpa_B(qx,dx); +/// const double gx = cpa_G(qx,dx); +/// const double x = u[dz][dy][dx]; +/// Bu_ += bx * x; +/// Gu_ += gx * x; +/// } +/// Bu[dz][dy][qx] = Bu_; +/// Gu[dz][dy][qx] = Gu_; +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; +/// double (*GBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; +/// double (*BGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm5; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// for(int qy = 0; qy < CPA_Q1D; ++qy) +/// { +/// double BBu_ = 0.0; +/// double GBu_ = 0.0; +/// double BGu_ = 0.0; +/// for(int dy = 0; dy < CPA_D1D; ++dy) +/// { +/// const double bx = cpa_B(qy,dy); +/// const double gx = cpa_G(qy,dy); +/// BBu_ += bx * Bu[dz][dy][qx]; +/// GBu_ += gx * Bu[dz][dy][qx]; +/// BGu_ += bx * Gu[dz][dy][qx]; +/// } +/// BBu[dz][qy][qx] = BBu_; +/// GBu[dz][qy][qx] = GBu_; +/// BGu[dz][qy][qx] = BGu_; +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*GBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0; +/// double (*BGBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1; +/// double (*BBGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm2; +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// for(int qy = 0; qy < CPA_Q1D; ++qy) +/// { +/// for(int qz = 0; qz < CPA_Q1D; ++qz) +/// { +/// double GBBu_ = 0.0; +/// double BGBu_ = 0.0; +/// double BBGu_ = 0.0; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// const double bx = cpa_B(qz,dz); +/// const double gx = cpa_G(qz,dz); +/// GBBu_ += gx * BBu[dz][qy][qx]; +/// BGBu_ += bx * GBu[dz][qy][qx]; +/// BBGu_ += bx * BGu[dz][qy][qx]; +/// } +/// GBBu[qz][qy][qx] = GBBu_; +/// BGBu[qz][qy][qx] = BGBu_; +/// BBGu[qz][qy][qx] = BBGu_; +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*DGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; +/// for(int qz = 0; qz < CPA_Q1D; ++qz) +/// { +/// for(int qy = 0; qy < CPA_Q1D; ++qy) +/// { +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// const double O1 = cpa_op(qx,qy,qz,0,e); +/// const double O2 = cpa_op(qx,qy,qz,1,e); +/// const double O3 = cpa_op(qx,qy,qz,2,e); +/// +/// const double gradX = BBGu[qz][qy][qx]; +/// const double gradY = BGBu[qz][qy][qx]; +/// const double gradZ = GBBu[qz][qy][qx]; +/// +/// DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ); +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*BDGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// for(int qy = 0; qy < CPA_Q1D; ++qy) +/// { +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// double BDGu_ = 0.0; +/// for(int qz = 0; qz < CPA_Q1D; ++qz) +/// { +/// const double w = cpa_Bt(dz,qz); +/// BDGu_ += w * DGu[qz][qy][qx]; +/// } +/// BDGu[dz][qy][qx] = BDGu_; +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// double (*BBDGu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm5; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// for(int dy = 0; dy < CPA_D1D; ++dy) +/// { +/// double BBDGu_ = 0.0; +/// for(int qy = 0; qy < CPA_Q1D; ++qy) +/// { +/// const double w = cpa_Bt(dy,qy); +/// BBDGu_ += w * BDGu[dz][qy][qx]; +/// } +/// BBDGu[dz][dy][qx] = BBDGu_; +/// } +/// } +/// } +/// MFEM_SYNC_THREAD; +/// for(int dz = 0; dz < CPA_D1D; ++dz) +/// { +/// for(int dy = 0; dy < CPA_D1D; ++dy) +/// { +/// for(int dx = 0; dx < CPA_D1D; ++dx) +/// { +/// double BBBDGu = 0.0; +/// for(int qx = 0; qx < CPA_Q1D; ++qx) +/// { +/// const double w = cpa_Bt(dx,qx); +/// BBBDGu += w * BBDGu[dz][dy][qx]; +/// } +/// cpaY_(dx,dy,dz,e) += BBBDGu; +/// } +/// } +/// } +/// } // element loop +/// + +#ifndef RAJAPerf_Apps_CONVECTION3DPA_HPP +#define RAJAPerf_Apps_CONVECTION3DPA_HPP + +#define CONVECTION3DPA_DATA_SETUP \ +Real_ptr Basis = m_B; \ +Real_ptr tBasis = m_Bt; \ +Real_ptr dBasis = m_G; \ +Real_ptr D = m_D; \ +Real_ptr X = m_X; \ +Real_ptr Y = m_Y; \ +Index_type NE = m_NE; + +#include "common/KernelBase.hpp" +#include "FEM_MACROS.hpp" + +#include "RAJA/RAJA.hpp" + +//Number of Dofs/Qpts in 1D +#define CPA_D1D 3 +#define CPA_Q1D 4 +#define CPA_VDIM 3 +#define cpa_B(x, y) Basis[x + CPA_Q1D * y] +#define cpa_Bt(x, y) tBasis[x + CPA_D1D * y] +#define cpa_G(x, y) dBasis[x + CPA_Q1D * y] +#define cpaX_(dx, dy, dz, e) \ + X[dx + CPA_D1D * dy + CPA_D1D * CPA_D1D * dz + CPA_D1D * CPA_D1D * CPA_D1D * e] +#define cpaY_(dx, dy, dz, e) \ + Y[dx + CPA_D1D * dy + CPA_D1D * CPA_D1D * dz + CPA_D1D * CPA_D1D * CPA_D1D * e] +#define cpa_op(qx, qy, qz, d, e) \ + D[qx + CPA_Q1D * qy + CPA_Q1D * CPA_Q1D * qz + CPA_Q1D * CPA_Q1D * CPA_Q1D * d + CPA_VDIM * CPA_Q1D * CPA_Q1D * CPA_Q1D * e] + +#define CONVECTION3DPA_0_GPU \ + constexpr int max_D1D = CPA_D1D; \ + constexpr int max_Q1D = CPA_Q1D; \ + constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D; \ + RAJA_TEAM_SHARED double sm0[max_DQ*max_DQ*max_DQ]; \ + RAJA_TEAM_SHARED double sm1[max_DQ*max_DQ*max_DQ]; \ + RAJA_TEAM_SHARED double sm2[max_DQ*max_DQ*max_DQ]; \ + RAJA_TEAM_SHARED double sm3[max_DQ*max_DQ*max_DQ]; \ + RAJA_TEAM_SHARED double sm4[max_DQ*max_DQ*max_DQ]; \ + RAJA_TEAM_SHARED double sm5[max_DQ*max_DQ*max_DQ]; \ + double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0; \ + double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1; \ + double (*Gu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm2; \ + double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; \ + double (*GBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; \ + double (*BGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm5; \ + double (*GBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0; \ + double (*BGBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1; \ + double (*BBGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm2; \ + double (*DGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; \ + double (*BDGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; \ + double (*BBDGu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm5; + + +#define CONVECTION3DPA_0_CPU \ + constexpr int max_D1D = CPA_D1D; \ + constexpr int max_Q1D = CPA_Q1D; \ + constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D; \ + double sm0[max_DQ*max_DQ*max_DQ]; \ + double sm1[max_DQ*max_DQ*max_DQ]; \ + double sm2[max_DQ*max_DQ*max_DQ]; \ + double sm3[max_DQ*max_DQ*max_DQ]; \ + double sm4[max_DQ*max_DQ*max_DQ]; \ + double sm5[max_DQ*max_DQ*max_DQ]; \ + double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0; \ + double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1; \ + double (*Gu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm2; \ + double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; \ + double (*GBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; \ + double (*BGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm5; \ + double (*GBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0; \ + double (*BGBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1; \ + double (*BBGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm2; \ + double (*DGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3; \ + double (*BDGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4; \ + double (*BBDGu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm5; + +#define CONVECTION3DPA_1 \ + u[dz][dy][dx] = cpaX_(dx,dy,dz,e); + +#define CONVECTION3DPA_2 \ + double Bu_ = 0.0; \ + double Gu_ = 0.0; \ + for (int dx = 0; dx < CPA_D1D; ++dx) \ + { \ + const double bx = cpa_B(qx,dx); \ + const double gx = cpa_G(qx,dx); \ + const double x = u[dz][dy][dx]; \ + Bu_ += bx * x; \ + Gu_ += gx * x; \ + } \ + Bu[dz][dy][qx] = Bu_; \ + Gu[dz][dy][qx] = Gu_; + +#define CONVECTION3DPA_3 \ + double BBu_ = 0.0; \ + double GBu_ = 0.0; \ + double BGu_ = 0.0; \ + for (int dy = 0; dy < CPA_D1D; ++dy) \ + { \ + const double bx = cpa_B(qy,dy); \ + const double gx = cpa_G(qy,dy); \ + BBu_ += bx * Bu[dz][dy][qx]; \ + GBu_ += gx * Bu[dz][dy][qx]; \ + BGu_ += bx * Gu[dz][dy][qx]; \ + } \ + BBu[dz][qy][qx] = BBu_; \ + GBu[dz][qy][qx] = GBu_; \ + BGu[dz][qy][qx] = BGu_; + +#define CONVECTION3DPA_4 \ + double GBBu_ = 0.0; \ + double BGBu_ = 0.0; \ + double BBGu_ = 0.0; \ + for (int dz = 0; dz < CPA_D1D; ++dz) \ + { \ + const double bx = cpa_B(qz,dz); \ + const double gx = cpa_G(qz,dz); \ + GBBu_ += gx * BBu[dz][qy][qx]; \ + BGBu_ += bx * GBu[dz][qy][qx]; \ + BBGu_ += bx * BGu[dz][qy][qx]; \ + } \ + GBBu[qz][qy][qx] = GBBu_; \ + BGBu[qz][qy][qx] = BGBu_; \ + BBGu[qz][qy][qx] = BBGu_; + +#define CONVECTION3DPA_5 \ + const double O1 = cpa_op(qx,qy,qz,0,e); \ + const double O2 = cpa_op(qx,qy,qz,1,e); \ + const double O3 = cpa_op(qx,qy,qz,2,e); \ + const double gradX = BBGu[qz][qy][qx]; \ + const double gradY = BGBu[qz][qy][qx]; \ + const double gradZ = GBBu[qz][qy][qx]; \ + DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ); + +#define CONVECTION3DPA_6 \ + double BDGu_ = 0.0; \ + for (int qz = 0; qz < CPA_Q1D; ++qz) \ + { \ + const double w = cpa_Bt(dz,qz); \ + BDGu_ += w * DGu[qz][qy][qx]; \ + } \ + BDGu[dz][qy][qx] = BDGu_; + +#define CONVECTION3DPA_7 \ + double BBDGu_ = 0.0; \ + for (int qy = 0; qy < CPA_Q1D; ++qy) \ + { \ + const double w = cpa_Bt(dy,qy); \ + BBDGu_ += w * BDGu[dz][qy][qx]; \ + } \ + BBDGu[dz][dy][qx] = BBDGu_; \ + +#define CONVECTION3DPA_8 \ + double BBBDGu = 0.0; \ + for (int qx = 0; qx < CPA_Q1D; ++qx) \ + { \ + const double w = cpa_Bt(dx,qx); \ + BBBDGu += w * BBDGu[dz][dy][qx]; \ + } \ + cpaY_(dx,dy,dz,e) += BBBDGu; + +namespace rajaperf +{ +class RunParams; + +namespace apps +{ + +class CONVECTION3DPA : public KernelBase +{ +public: + + CONVECTION3DPA(const RunParams& params); + + ~CONVECTION3DPA(); + + void setUp(VariantID vid, size_t tune_idx); + void updateChecksum(VariantID vid, size_t tune_idx); + void tearDown(VariantID vid, size_t tune_idx); + + void runSeqVariant(VariantID vid, size_t tune_idx); + void runOpenMPVariant(VariantID vid, size_t tune_idx); + void runCudaVariant(VariantID vid, size_t tune_idx); + void runHipVariant(VariantID vid, size_t tune_idx); + void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + + void setCudaTuningDefinitions(VariantID vid); + void setHipTuningDefinitions(VariantID vid); + template < size_t block_size > + void runCudaVariantImpl(VariantID vid); + template < size_t block_size > + void runHipVariantImpl(VariantID vid); + +private: + static const size_t default_gpu_block_size = CPA_Q1D * CPA_Q1D * CPA_Q1D; + using gpu_block_sizes_type = gpu_block_size::list_type; + + Real_ptr m_B; + Real_ptr m_Bt; + Real_ptr m_G; + Real_ptr m_Gt; + Real_ptr m_D; + Real_ptr m_X; + Real_ptr m_Y; + + Index_type m_NE; + Index_type m_NE_default; +}; + +} // end namespace apps +} // end namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/apps/DEL_DOT_VEC_2D-Cuda.cpp b/src/apps/DEL_DOT_VEC_2D-Cuda.cpp index c19d0770e..4373c194c 100644 --- a/src/apps/DEL_DOT_VEC_2D-Cuda.cpp +++ b/src/apps/DEL_DOT_VEC_2D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D-Hip.cpp b/src/apps/DEL_DOT_VEC_2D-Hip.cpp index 782e4099c..9cae1ad99 100644 --- a/src/apps/DEL_DOT_VEC_2D-Hip.cpp +++ b/src/apps/DEL_DOT_VEC_2D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D-OMP.cpp b/src/apps/DEL_DOT_VEC_2D-OMP.cpp index 91250c796..fd2977595 100644 --- a/src/apps/DEL_DOT_VEC_2D-OMP.cpp +++ b/src/apps/DEL_DOT_VEC_2D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D-OMPTarget.cpp b/src/apps/DEL_DOT_VEC_2D-OMPTarget.cpp index 479ab795e..2307f92b1 100644 --- a/src/apps/DEL_DOT_VEC_2D-OMPTarget.cpp +++ b/src/apps/DEL_DOT_VEC_2D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D-Seq.cpp b/src/apps/DEL_DOT_VEC_2D-Seq.cpp index 07100da04..5a3080635 100644 --- a/src/apps/DEL_DOT_VEC_2D-Seq.cpp +++ b/src/apps/DEL_DOT_VEC_2D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 24121e157..04c2d29f0 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp index 60d577a05..0e22bb399 100644 --- a/src/apps/DEL_DOT_VEC_2D.hpp +++ b/src/apps/DEL_DOT_VEC_2D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DIFFUSION3DPA-Cuda.cpp b/src/apps/DIFFUSION3DPA-Cuda.cpp index 9ceafb94c..e3044a963 100644 --- a/src/apps/DIFFUSION3DPA-Cuda.cpp +++ b/src/apps/DIFFUSION3DPA-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -132,11 +132,11 @@ void DIFFUSION3DPA::runCudaVariantImpl(VariantID vid) { DIFFUSION3DPA_DATA_SETUP_CUDA; + dim3 nthreads_per_block(DPA_Q1D, DPA_Q1D, DPA_Q1D); + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - dim3 nthreads_per_block(DPA_Q1D, DPA_Q1D, DPA_Q1D); - Diffusion3DPA<<>>( Basis, dBasis, D, X, Y, symmetric); @@ -156,198 +156,198 @@ void DIFFUSION3DPA::runCudaVariantImpl(VariantID vid) { constexpr bool async = true; using launch_policy = - RAJA::expt::LaunchPolicy>; + RAJA::LaunchPolicy>; using outer_x = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_x = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_y = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_z = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(NE), - RAJA::expt::Threads(DPA_Q1D, DPA_Q1D, DPA_Q1D)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(DPA_Q1D, DPA_Q1D, DPA_Q1D)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { DIFFUSION3DPA_0_GPU; - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_1; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_2; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_3; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_4; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_5; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int q) { DIFFUSION3DPA_6; } // lambda (q) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (d) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_7; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_8; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_9; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/DIFFUSION3DPA-Hip.cpp b/src/apps/DIFFUSION3DPA-Hip.cpp index 58a40a77e..08d4b9194 100644 --- a/src/apps/DIFFUSION3DPA-Hip.cpp +++ b/src/apps/DIFFUSION3DPA-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -158,198 +158,198 @@ void DIFFUSION3DPA::runHipVariantImpl(VariantID vid) { constexpr bool async = true; using launch_policy = - RAJA::expt::LaunchPolicy>; + RAJA::LaunchPolicy>; using outer_x = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_x = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_y = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; using inner_z = - RAJA::expt::LoopPolicy; + RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(NE), - RAJA::expt::Threads(DPA_Q1D, DPA_Q1D, DPA_Q1D)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(DPA_Q1D, DPA_Q1D, DPA_Q1D)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { DIFFUSION3DPA_0_GPU; - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_1; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_2; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_3; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_4; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_5; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int q) { DIFFUSION3DPA_6; } // lambda (q) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (d) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_7; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_8; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_9; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/DIFFUSION3DPA-OMP.cpp b/src/apps/DIFFUSION3DPA-OMP.cpp index 7d32b2b41..cc99f6a07 100644 --- a/src/apps/DIFFUSION3DPA-OMP.cpp +++ b/src/apps/DIFFUSION3DPA-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -115,194 +115,194 @@ void DIFFUSION3DPA::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t case RAJA_OpenMP: { // Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; - using inner_z = RAJA::expt::LoopPolicy; + using inner_z = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { DIFFUSION3DPA_0_CPU; - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_1; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_2; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_3; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_4; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_5; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int q) { DIFFUSION3DPA_6; } // lambda (q) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (d) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_7; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_8; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_9; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/DIFFUSION3DPA-OMPTarget.cpp b/src/apps/DIFFUSION3DPA-OMPTarget.cpp index 16cff1087..befedda9c 100644 --- a/src/apps/DIFFUSION3DPA-OMPTarget.cpp +++ b/src/apps/DIFFUSION3DPA-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DIFFUSION3DPA-Seq.cpp b/src/apps/DIFFUSION3DPA-Seq.cpp index a84b4bc61..420a94dab 100644 --- a/src/apps/DIFFUSION3DPA-Seq.cpp +++ b/src/apps/DIFFUSION3DPA-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -113,194 +113,194 @@ void DIFFUSION3DPA::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune case RAJA_Seq: { // Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; - using inner_z = RAJA::expt::LoopPolicy; + using inner_z = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { DIFFUSION3DPA_0_CPU; - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_1; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_2; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_3; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_4; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qx) { DIFFUSION3DPA_5; } // lambda (qx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + RAJA::loop(ctx, RAJA::RangeSegment(0, 1), [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int q) { DIFFUSION3DPA_6; } // lambda (q) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (d) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_7; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (qy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_8; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (qz) - ); //RAJA::expt::loop + ); //RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), [&](int dx) { DIFFUSION3DPA_9; } // lambda (dx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (dz) - ); //RAJA::expt::loop + ); //RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 3844668c6..b59d21ad1 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/DIFFUSION3DPA.hpp b/src/apps/DIFFUSION3DPA.hpp index b0ba7c977..62967d5c0 100644 --- a/src/apps/DIFFUSION3DPA.hpp +++ b/src/apps/DIFFUSION3DPA.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY-Cuda.cpp b/src/apps/ENERGY-Cuda.cpp index a99a928e3..606e11a9f 100644 --- a/src/apps/ENERGY-Cuda.cpp +++ b/src/apps/ENERGY-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY-Hip.cpp b/src/apps/ENERGY-Hip.cpp index e7e882cff..b96e49c3a 100644 --- a/src/apps/ENERGY-Hip.cpp +++ b/src/apps/ENERGY-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY-OMP.cpp b/src/apps/ENERGY-OMP.cpp index f06c2efe9..235386ff0 100644 --- a/src/apps/ENERGY-OMP.cpp +++ b/src/apps/ENERGY-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY-OMPTarget.cpp b/src/apps/ENERGY-OMPTarget.cpp index 3027bd25f..4fb375d50 100644 --- a/src/apps/ENERGY-OMPTarget.cpp +++ b/src/apps/ENERGY-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY-Seq.cpp b/src/apps/ENERGY-Seq.cpp index 5bc229c6e..68eb1b2dc 100644 --- a/src/apps/ENERGY-Seq.cpp +++ b/src/apps/ENERGY-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 9ed11381a..61799e150 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp index 6461fdd5f..22af34867 100644 --- a/src/apps/ENERGY.hpp +++ b/src/apps/ENERGY.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FEM_MACROS.hpp b/src/apps/FEM_MACROS.hpp index 474ada22b..f88e7b55d 100644 --- a/src/apps/FEM_MACROS.hpp +++ b/src/apps/FEM_MACROS.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR-Cuda.cpp b/src/apps/FIR-Cuda.cpp index 4dea7c82e..c1590ecc3 100644 --- a/src/apps/FIR-Cuda.cpp +++ b/src/apps/FIR-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR-Hip.cpp b/src/apps/FIR-Hip.cpp index 42e3503e0..dd591498d 100644 --- a/src/apps/FIR-Hip.cpp +++ b/src/apps/FIR-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR-OMP.cpp b/src/apps/FIR-OMP.cpp index 7a5415130..5b3cc2a35 100644 --- a/src/apps/FIR-OMP.cpp +++ b/src/apps/FIR-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR-OMPTarget.cpp b/src/apps/FIR-OMPTarget.cpp index 90be7bd3a..318c98e51 100644 --- a/src/apps/FIR-OMPTarget.cpp +++ b/src/apps/FIR-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR-Seq.cpp b/src/apps/FIR-Seq.cpp index 3a196a1f1..d5dba3ade 100644 --- a/src/apps/FIR-Seq.cpp +++ b/src/apps/FIR-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 8dd25358e..75c0e803a 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp index dd46d9934..3ca8a1cef 100644 --- a/src/apps/FIR.hpp +++ b/src/apps/FIR.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE-Cuda.cpp b/src/apps/HALOEXCHANGE-Cuda.cpp index cab4f911d..abd508490 100644 --- a/src/apps/HALOEXCHANGE-Cuda.cpp +++ b/src/apps/HALOEXCHANGE-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE-Hip.cpp b/src/apps/HALOEXCHANGE-Hip.cpp index 4070edc72..fd0a52a39 100644 --- a/src/apps/HALOEXCHANGE-Hip.cpp +++ b/src/apps/HALOEXCHANGE-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE-OMP.cpp b/src/apps/HALOEXCHANGE-OMP.cpp index daa1dbad8..050046479 100644 --- a/src/apps/HALOEXCHANGE-OMP.cpp +++ b/src/apps/HALOEXCHANGE-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE-OMPTarget.cpp b/src/apps/HALOEXCHANGE-OMPTarget.cpp index 4c8f1655c..488bd20d2 100644 --- a/src/apps/HALOEXCHANGE-OMPTarget.cpp +++ b/src/apps/HALOEXCHANGE-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE-Seq.cpp b/src/apps/HALOEXCHANGE-Seq.cpp index 755a47390..b6bd892ba 100644 --- a/src/apps/HALOEXCHANGE-Seq.cpp +++ b/src/apps/HALOEXCHANGE-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 890fcf0a9..0d8eac44f 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp index 5d653762a..50854612e 100644 --- a/src/apps/HALOEXCHANGE.hpp +++ b/src/apps/HALOEXCHANGE.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED-Cuda.cpp b/src/apps/HALOEXCHANGE_FUSED-Cuda.cpp index 52d1fca5c..202f59305 100644 --- a/src/apps/HALOEXCHANGE_FUSED-Cuda.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED-Hip.cpp b/src/apps/HALOEXCHANGE_FUSED-Hip.cpp index 7d64d86f7..6fe04a8c0 100644 --- a/src/apps/HALOEXCHANGE_FUSED-Hip.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED-OMP.cpp b/src/apps/HALOEXCHANGE_FUSED-OMP.cpp index 0400c20b0..dc44e2aae 100644 --- a/src/apps/HALOEXCHANGE_FUSED-OMP.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED-OMPTarget.cpp b/src/apps/HALOEXCHANGE_FUSED-OMPTarget.cpp index 7c465681c..e6e94a9e7 100644 --- a/src/apps/HALOEXCHANGE_FUSED-OMPTarget.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED-Seq.cpp b/src/apps/HALOEXCHANGE_FUSED-Seq.cpp index 984aaf724..e7baa12de 100644 --- a/src/apps/HALOEXCHANGE_FUSED-Seq.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 406cc654b..b7c0bfd84 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index e47c1e14e..f9ec78d8d 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES-Cuda.cpp b/src/apps/LTIMES-Cuda.cpp index 4e38f769b..98d7a5a77 100644 --- a/src/apps/LTIMES-Cuda.cpp +++ b/src/apps/LTIMES-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES-Hip.cpp b/src/apps/LTIMES-Hip.cpp index a78394d25..4cb052a0f 100644 --- a/src/apps/LTIMES-Hip.cpp +++ b/src/apps/LTIMES-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES-OMP.cpp b/src/apps/LTIMES-OMP.cpp index 91d0faeac..3c565d656 100644 --- a/src/apps/LTIMES-OMP.cpp +++ b/src/apps/LTIMES-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES-OMPTarget.cpp b/src/apps/LTIMES-OMPTarget.cpp index e89e6cbfa..1db07b850 100644 --- a/src/apps/LTIMES-OMPTarget.cpp +++ b/src/apps/LTIMES-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES-Seq.cpp b/src/apps/LTIMES-Seq.cpp index 92fd7c319..d512af015 100644 --- a/src/apps/LTIMES-Seq.cpp +++ b/src/apps/LTIMES-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index ede451a0a..cad55c72e 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp index 31eae0f83..2f3f0ca6d 100644 --- a/src/apps/LTIMES.hpp +++ b/src/apps/LTIMES.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW-Cuda.cpp b/src/apps/LTIMES_NOVIEW-Cuda.cpp index b363f0049..d51b541cc 100644 --- a/src/apps/LTIMES_NOVIEW-Cuda.cpp +++ b/src/apps/LTIMES_NOVIEW-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW-Hip.cpp b/src/apps/LTIMES_NOVIEW-Hip.cpp index 47a8c8956..8ac5fafe1 100644 --- a/src/apps/LTIMES_NOVIEW-Hip.cpp +++ b/src/apps/LTIMES_NOVIEW-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW-OMP.cpp b/src/apps/LTIMES_NOVIEW-OMP.cpp index e41853651..7ad8e85dc 100644 --- a/src/apps/LTIMES_NOVIEW-OMP.cpp +++ b/src/apps/LTIMES_NOVIEW-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW-OMPTarget.cpp b/src/apps/LTIMES_NOVIEW-OMPTarget.cpp index ca49c8859..cc8372352 100644 --- a/src/apps/LTIMES_NOVIEW-OMPTarget.cpp +++ b/src/apps/LTIMES_NOVIEW-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW-Seq.cpp b/src/apps/LTIMES_NOVIEW-Seq.cpp index 7da062715..f025f8dc4 100644 --- a/src/apps/LTIMES_NOVIEW-Seq.cpp +++ b/src/apps/LTIMES_NOVIEW-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index c0c0f7413..9064f7c11 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp index 1385864fb..96a296366 100644 --- a/src/apps/LTIMES_NOVIEW.hpp +++ b/src/apps/LTIMES_NOVIEW.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/MASS3DPA-Cuda.cpp b/src/apps/MASS3DPA-Cuda.cpp index b872a2a3c..63a42fafc 100644 --- a/src/apps/MASS3DPA-Cuda.cpp +++ b/src/apps/MASS3DPA-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -114,11 +114,11 @@ void MASS3DPA::runCudaVariantImpl(VariantID vid) { MASS3DPA_DATA_SETUP_CUDA; + dim3 nthreads_per_block(MPA_Q1D, MPA_Q1D, 1); + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - dim3 nthreads_per_block(MPA_Q1D, MPA_Q1D, 1); - Mass3DPA<<>>(B, Bt, D, X, Y); cudaErrchk( cudaGetLastError() ); @@ -136,132 +136,132 @@ void MASS3DPA::runCudaVariantImpl(VariantID vid) { constexpr bool async = true; - using launch_policy = RAJA::expt::LaunchPolicy>; + using launch_policy = RAJA::LaunchPolicy>; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(NE), - RAJA::expt::Threads(MPA_Q1D, MPA_Q1D, 1)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(MPA_Q1D, MPA_Q1D, 1)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { MASS3DPA_0_GPU - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_1 } - ); // RAJA::expt::loop + ); // RAJA::loop - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { MASS3DPA_2 } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_3 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_4 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_5 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { MASS3DPA_6 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_7 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_8 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_9 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/MASS3DPA-Hip.cpp b/src/apps/MASS3DPA-Hip.cpp index 804a858fa..1c9c10f60 100644 --- a/src/apps/MASS3DPA-Hip.cpp +++ b/src/apps/MASS3DPA-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -139,131 +139,131 @@ void MASS3DPA::runHipVariantImpl(VariantID vid) { constexpr bool async = true; - using launch_policy = RAJA::expt::LaunchPolicy>; + using launch_policy = RAJA::LaunchPolicy>; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(NE), - RAJA::expt::Threads(MPA_Q1D, MPA_Q1D, 1)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(NE), + RAJA::Threads(MPA_Q1D, MPA_Q1D, 1)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { MASS3DPA_0_GPU - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_1 } - ); // RAJA::expt::loop + ); // RAJA::loop - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { MASS3DPA_2 } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_3 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_4 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_5 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { MASS3DPA_6 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_7 } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_8 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_9 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/MASS3DPA-OMP.cpp b/src/apps/MASS3DPA-OMP.cpp index 49f74774b..3c6361786 100644 --- a/src/apps/MASS3DPA-OMP.cpp +++ b/src/apps/MASS3DPA-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -99,132 +99,132 @@ void MASS3DPA::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i case RAJA_OpenMP: { //Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { //Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { MASS3DPA_0_CPU - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_1 } - ); // RAJA::expt::loop + ); // RAJA::loop - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { MASS3DPA_2 } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_3 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_4 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_5 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { MASS3DPA_6 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_7 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_8 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_9 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // // RAJA::expt::launch + ); // // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/MASS3DPA-OMPTarget.cpp b/src/apps/MASS3DPA-OMPTarget.cpp index 86021b52d..62fb1def4 100644 --- a/src/apps/MASS3DPA-OMPTarget.cpp +++ b/src/apps/MASS3DPA-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/MASS3DPA-Seq.cpp b/src/apps/MASS3DPA-Seq.cpp index d276c4a57..4f42abd50 100644 --- a/src/apps/MASS3DPA-Seq.cpp +++ b/src/apps/MASS3DPA-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -97,131 +97,131 @@ void MASS3DPA::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) case RAJA_Seq: { //Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + RAJA::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { MASS3DPA_0_CPU - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_1 } - ); // RAJA::expt::loop + ); // RAJA::loop - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { MASS3DPA_2 } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (dy) - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_3 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_4 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { MASS3DPA_5 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { MASS3DPA_6 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_7 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_8 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), + RAJA::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { MASS3DPA_9 } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (e) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index 288e7ff82..ab4a5095f 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/MASS3DPA.hpp b/src/apps/MASS3DPA.hpp index 0d1c3a42d..7365fa011 100644 --- a/src/apps/MASS3DPA.hpp +++ b/src/apps/MASS3DPA.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/NODAL_ACCUMULATION_3D-Cuda.cpp b/src/apps/NODAL_ACCUMULATION_3D-Cuda.cpp index e8aadcb2b..73dd1db0f 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-Cuda.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -109,7 +109,7 @@ void NODAL_ACCUMULATION_3D::runCudaVariantImpl(VariantID vid) NODAL_ACCUMULATION_3D_DATA_TEARDOWN_CUDA; } else { - std::cout << "\n NODAL_ACCUMULATION_3D : Unknown Cuda variant id = " << vid << std::endl; + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown Cuda variant id = " << vid << std::endl; } } diff --git a/src/apps/NODAL_ACCUMULATION_3D-Hip.cpp b/src/apps/NODAL_ACCUMULATION_3D-Hip.cpp index 09cea6211..22731ef69 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-Hip.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -109,7 +109,7 @@ void NODAL_ACCUMULATION_3D::runHipVariantImpl(VariantID vid) NODAL_ACCUMULATION_3D_DATA_TEARDOWN_HIP; } else { - std::cout << "\n NODAL_ACCUMULATION_3D : Unknown Hip variant id = " << vid << std::endl; + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown Hip variant id = " << vid << std::endl; } } diff --git a/src/apps/NODAL_ACCUMULATION_3D-OMP.cpp b/src/apps/NODAL_ACCUMULATION_3D-OMP.cpp index baaf60664..57fe4b861 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-OMP.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -133,7 +133,7 @@ void NODAL_ACCUMULATION_3D::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUS } default : { - std::cout << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/NODAL_ACCUMULATION_3D-OMPTarget.cpp b/src/apps/NODAL_ACCUMULATION_3D-OMPTarget.cpp index f19189c64..db75e1340 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-OMPTarget.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -116,7 +116,7 @@ void NODAL_ACCUMULATION_3D::runOpenMPTargetVariant(VariantID vid, size_t RAJAPER NODAL_ACCUMULATION_3D_DATA_TEARDOWN_OMP_TARGET; } else { - std::cout << "\n NODAL_ACCUMULATION_3D : Unknown OMP Target variant id = " << vid << std::endl; + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown OMP Target variant id = " << vid << std::endl; } } diff --git a/src/apps/NODAL_ACCUMULATION_3D-Seq.cpp b/src/apps/NODAL_ACCUMULATION_3D-Seq.cpp index 61449d0f6..9dc2e6b13 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-Seq.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -93,7 +93,7 @@ void NODAL_ACCUMULATION_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ #endif // RUN_RAJA_SEQ default : { - std::cout << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 5fd512fb7..c041d2dfa 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/NODAL_ACCUMULATION_3D.hpp b/src/apps/NODAL_ACCUMULATION_3D.hpp index a574f331a..5b0ce0d77 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.hpp +++ b/src/apps/NODAL_ACCUMULATION_3D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE-Cuda.cpp b/src/apps/PRESSURE-Cuda.cpp index 14ad2ae34..e725adda2 100644 --- a/src/apps/PRESSURE-Cuda.cpp +++ b/src/apps/PRESSURE-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE-Hip.cpp b/src/apps/PRESSURE-Hip.cpp index 03c9e04fb..4db2b623e 100644 --- a/src/apps/PRESSURE-Hip.cpp +++ b/src/apps/PRESSURE-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE-OMP.cpp b/src/apps/PRESSURE-OMP.cpp index 867e72586..0f0dd2e4e 100644 --- a/src/apps/PRESSURE-OMP.cpp +++ b/src/apps/PRESSURE-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE-OMPTarget.cpp b/src/apps/PRESSURE-OMPTarget.cpp index 8c25f44c7..7ff878c05 100644 --- a/src/apps/PRESSURE-OMPTarget.cpp +++ b/src/apps/PRESSURE-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE-Seq.cpp b/src/apps/PRESSURE-Seq.cpp index c2f79e977..22731d5c2 100644 --- a/src/apps/PRESSURE-Seq.cpp +++ b/src/apps/PRESSURE-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index df2cb744f..ea32d1b19 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp index 6421ce6b0..c0568a8e0 100644 --- a/src/apps/PRESSURE.hpp +++ b/src/apps/PRESSURE.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D-Cuda.cpp b/src/apps/VOL3D-Cuda.cpp index 3f65c1b8a..42abea71b 100644 --- a/src/apps/VOL3D-Cuda.cpp +++ b/src/apps/VOL3D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D-Hip.cpp b/src/apps/VOL3D-Hip.cpp index 70f121e09..6032649f5 100644 --- a/src/apps/VOL3D-Hip.cpp +++ b/src/apps/VOL3D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D-OMP.cpp b/src/apps/VOL3D-OMP.cpp index 0f773876c..98a3183b4 100644 --- a/src/apps/VOL3D-OMP.cpp +++ b/src/apps/VOL3D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D-OMPTarget.cpp b/src/apps/VOL3D-OMPTarget.cpp index 75d8fb2b0..61a403648 100644 --- a/src/apps/VOL3D-OMPTarget.cpp +++ b/src/apps/VOL3D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D-Seq.cpp b/src/apps/VOL3D-Seq.cpp index bb4227280..6d6159873 100644 --- a/src/apps/VOL3D-Seq.cpp +++ b/src/apps/VOL3D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index fd2ebb5aa..9593b2735 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp index 9ddedbd19..aa6701855 100644 --- a/src/apps/VOL3D.hpp +++ b/src/apps/VOL3D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/WIP-COUPLE.cpp b/src/apps/WIP-COUPLE.cpp index 0f25f5ee0..33e505231 100644 --- a/src/apps/WIP-COUPLE.cpp +++ b/src/apps/WIP-COUPLE.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp index cdafcd5eb..33faa85cc 100644 --- a/src/apps/WIP-COUPLE.hpp +++ b/src/apps/WIP-COUPLE.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt new file mode 100644 index 000000000..5fdb52763 --- /dev/null +++ b/src/basic-kokkos/CMakeLists.txt @@ -0,0 +1,27 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) + +blt_add_library( + NAME basic-kokkos + SOURCES + PI_ATOMIC-Kokkos.cpp + DAXPY-Kokkos.cpp + IF_QUAD-Kokkos.cpp + INIT3-Kokkos.cpp + INIT_VIEW1D-Kokkos.cpp + INIT_VIEW1D_OFFSET-Kokkos.cpp + MULADDSUB-Kokkos.cpp + NESTED_INIT-Kokkos.cpp + REDUCE3_INT-Kokkos.cpp + TRAP_INT-Kokkos.cpp + DAXPY_ATOMIC-Kokkos.cpp + INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp new file mode 100644 index 000000000..b8ab91cd1 --- /dev/null +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +struct DaxpyFunctor { + Real_ptr x; + Real_ptr y; + Real_type a; + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) + : x(m_x), y(m_y), a(m_a) {} + void operator()(Index_type i) const { DAXPY_BODY; } +}; + +void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_DATA_SETUP; + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "DAXPY-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + default: { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + } + + // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from + // the device + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..9e74c4e0c --- /dev/null +++ b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp @@ -0,0 +1,70 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +// Delete me +// For de-bugging: +#include "RAJA/RAJA.hpp" + +namespace rajaperf { +namespace basic { + +void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + // + // Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp + // + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "DAXPY_ATOMIC_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Kokkos::atomic_add(&y_view[i], a * x_view[i]); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp new file mode 100644 index 000000000..19e916dac --- /dev/null +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + IF_QUAD_DATA_SETUP; + + // Instantiating views using getViewFromPointer for the IF_QUAD definition + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + auto x1_view = getViewFromPointer(x1, iend); + auto x2_view = getViewFromPointer(x2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "IF_QUAD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Real_type s = b_view[i] * b_view[i] - 4.0 * a_view[i] * c_view[i]; + if (s >= 0) { + s = sqrt(s); + x2_view[i] = (-b_view[i] + s) / (2.0 * a_view[i]); + x1_view[i] = (-b_view[i] - s) / (2.0 * a_view[i]); + } else { + x2_view[i] = 0.0; + x1_view[i] = 0.0; + } + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + moveDataToHostFromKokkosView(x1, x1_view, iend); + moveDataToHostFromKokkosView(x2, x2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp new file mode 100644 index 000000000..661180c7b --- /dev/null +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -0,0 +1,73 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" +#if defined (RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + // Instantiating Views using getViewFromPointer for the INIT3 definition + // (i.e., INIT3.hpp) + + INIT3_DATA_SETUP; + + // The pointer is the first argument, and the last index, denoted by iend, is + // your second argument + // + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = out2_view[i] = out3_view[i] = + -in1_view[i] - in2_view[i]; + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp new file mode 100644 index 000000000..8c775a3b0 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -0,0 +1,58 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT_VIEW1D_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT_VIEW1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + a_view[i] = (i + 1) * v; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp new file mode 100644 index 000000000..9df018264 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -0,0 +1,60 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize() + 1; + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { a_view[i - ibegin] = i * v; }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + << std::endl; + } + } + + // Move data from Kokkos View (on Device) back to Host + moveDataToHostFromKokkosView(a, a_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp new file mode 100644 index 000000000..49e890315 --- /dev/null +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -0,0 +1,72 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MULADDSUB_DATA_SETUP; + + // Define Kokkos Views that will wrap pointers defined in MULADDSUB.hpp + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // If SIMD really matters , consider using Kokkos SIMD + Kokkos::parallel_for( + "MULTISUB-KokkosSeq Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = in1_view[i] * in2_view[i]; + out2_view[i] = in1_view[i] + in2_view[i]; + out3_view[i] = in1_view[i] - in2_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + } + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp new file mode 100644 index 000000000..36929cead --- /dev/null +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -0,0 +1,78 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + + // Wrap the nested init array pointer in a Kokkos View + // In a Kokkos View, array arguments for array boundaries go from outmost + // to innermost dimension sizes + // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT + + auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + // + // Used in Kokkos variant (below). Do not remove. + // + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // MDRange can be optimized + Kokkos::parallel_for( + "NESTED_INIT KokkosSeq", + // Range policy to define amount of work to be done + Kokkos::MDRangePolicy, + // Execution space + Kokkos::DefaultExecutionSpace>({0, 0, 0}, + {nk, nj, ni}), + // Loop body + KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { + array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; + }); + } + + Kokkos::fence(); + + stopTimer(); + // Moves mirror data from GPU to CPU (void, i.e., no return type). In + // this moving of data back to Host, the layout is changed back to Layout + // Right, vs. the LayoutLeft of the GPU + moveDataToHostFromKokkosView(array, array_kokkos_view, nk, nj, ni); + + break; + } + + default: { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..233ca71af --- /dev/null +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PI_ATOMIC_DATA_SETUP; + + // Declare Kokkos View that will wrap the pointer defined in PI_ATOMIC.hpp + auto pi_view = getViewFromPointer(pi, 1); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Initializing a value, pi, on the host + *pi = m_pi_init; + + pi_view = getViewFromPointer(pi, 1); + + Kokkos::parallel_for( + "PI_ATOMIC-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + double x = (double(i) + 0.5) * dx; + // Make a reference to the 0th element of a 1D view with one + // element + Kokkos::atomic_add(&pi_view(0), dx / (1.0 + x * x)); + }); + // Moving the data on the device (held in the KokkosView) BACK to the + // pointer, pi. + moveDataToHostFromKokkosView(pi, pi_view, 1); + *pi *= 4.0; + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp new file mode 100644 index 000000000..23c0ab6f4 --- /dev/null +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include +#include + +namespace rajaperf { +namespace basic { + +void REDUCE3_INT::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE3_INT_DATA_SETUP; + + // Declare KokkosView that will wrap the pointer to a vector + + auto vec_view = getViewFromPointer(vec, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type max_value = m_vmax_init; + Int_type min_value = m_vmin_init; + Int_type sum = m_vsum_init; + // ADL: argument-dependent look up here + parallel_reduce( + "REDUCE3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Int_type &tl_max, Int_type &tl_min, + Int_type &tl_sum) { + Int_type vec_i = vec_view[i]; + if (vec_i > tl_max) + tl_max = vec_i; + if (vec_i < tl_min) + tl_min = vec_i; + tl_sum += vec_i; + }, + Kokkos::Max(max_value), Kokkos::Min(min_value), + sum); + m_vsum += static_cast(sum); + m_vmin = Kokkos::min(m_vmin, static_cast(min_value)); + m_vmax = Kokkos::max(m_vmax, static_cast(max_value)); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(vec, vec_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp new file mode 100644 index 000000000..5cdb9060f --- /dev/null +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +// +KOKKOS_FUNCTION +Real_type trap_int_func(Real_type x, Real_type y, Real_type xp, Real_type yp) { + Real_type denom = (x - xp) * (x - xp) + (y - yp) * (y - yp); + denom = 1.0 / sqrt(denom); + return denom; +} + +void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRAP_INT_DATA_SETUP; + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type trap_integral_val = m_sumx_init; + + Kokkos::parallel_reduce( + "TRAP_INT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type &sumx){TRAP_INT_BODY}, + trap_integral_val); + + m_sumx += static_cast(trap_integral_val) * h; + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index ceeb1a502..3be6e0c3c 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/src/basic/DAXPY-Cuda.cpp b/src/basic/DAXPY-Cuda.cpp index a87421c4f..1ffe98c6c 100644 --- a/src/basic/DAXPY-Cuda.cpp +++ b/src/basic/DAXPY-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY-Hip.cpp b/src/basic/DAXPY-Hip.cpp index 25810c19e..c9e365370 100644 --- a/src/basic/DAXPY-Hip.cpp +++ b/src/basic/DAXPY-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY-OMP.cpp b/src/basic/DAXPY-OMP.cpp index a57e1709d..afc0e653c 100644 --- a/src/basic/DAXPY-OMP.cpp +++ b/src/basic/DAXPY-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY-OMPTarget.cpp b/src/basic/DAXPY-OMPTarget.cpp index a3862d80a..6dcc2a1c0 100644 --- a/src/basic/DAXPY-OMPTarget.cpp +++ b/src/basic/DAXPY-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY-Seq.cpp b/src/basic/DAXPY-Seq.cpp index 3a262561f..13e7d230f 100644 --- a/src/basic/DAXPY-Seq.cpp +++ b/src/basic/DAXPY-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 6d6133eb6..f9f3c9d2d 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DAXPY::~DAXPY() diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index db8501e9f..bcaca8054 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC-Cuda.cpp b/src/basic/DAXPY_ATOMIC-Cuda.cpp index 1e8210bd2..23548ddc4 100644 --- a/src/basic/DAXPY_ATOMIC-Cuda.cpp +++ b/src/basic/DAXPY_ATOMIC-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY_ATOMIC-Hip.cpp b/src/basic/DAXPY_ATOMIC-Hip.cpp index a1e7a6465..3eca8eba4 100644 --- a/src/basic/DAXPY_ATOMIC-Hip.cpp +++ b/src/basic/DAXPY_ATOMIC-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY_ATOMIC-OMP.cpp b/src/basic/DAXPY_ATOMIC-OMP.cpp index b28330d7e..4d2f4db87 100644 --- a/src/basic/DAXPY_ATOMIC-OMP.cpp +++ b/src/basic/DAXPY_ATOMIC-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY_ATOMIC-OMPTarget.cpp b/src/basic/DAXPY_ATOMIC-OMPTarget.cpp index 7b19b0cf7..3c05239bf 100644 --- a/src/basic/DAXPY_ATOMIC-OMPTarget.cpp +++ b/src/basic/DAXPY_ATOMIC-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY_ATOMIC-Seq.cpp b/src/basic/DAXPY_ATOMIC-Seq.cpp index 8eabef6cd..7d47cbebe 100644 --- a/src/basic/DAXPY_ATOMIC-Seq.cpp +++ b/src/basic/DAXPY_ATOMIC-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 1e5d4e00e..1b9017273 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DAXPY_ATOMIC::~DAXPY_ATOMIC() diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index 909939a45..9c2890e48 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD-Cuda.cpp b/src/basic/IF_QUAD-Cuda.cpp index 66146371c..88b2e29bc 100644 --- a/src/basic/IF_QUAD-Cuda.cpp +++ b/src/basic/IF_QUAD-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/IF_QUAD-Hip.cpp b/src/basic/IF_QUAD-Hip.cpp index 6ded209a9..51eb4b29a 100644 --- a/src/basic/IF_QUAD-Hip.cpp +++ b/src/basic/IF_QUAD-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/IF_QUAD-OMP.cpp b/src/basic/IF_QUAD-OMP.cpp index 93ea37e88..e952f05fb 100644 --- a/src/basic/IF_QUAD-OMP.cpp +++ b/src/basic/IF_QUAD-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/IF_QUAD-OMPTarget.cpp b/src/basic/IF_QUAD-OMPTarget.cpp index ca0a4ac0a..34acba2a4 100644 --- a/src/basic/IF_QUAD-OMPTarget.cpp +++ b/src/basic/IF_QUAD-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/IF_QUAD-Seq.cpp b/src/basic/IF_QUAD-Seq.cpp index cb303701d..09227c748 100644 --- a/src/basic/IF_QUAD-Seq.cpp +++ b/src/basic/IF_QUAD-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 69396d330..57ef34f7a 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -55,6 +55,8 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } IF_QUAD::~IF_QUAD() diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index 4d2a22c22..f1f3e12a8 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST-Cuda.cpp b/src/basic/INDEXLIST-Cuda.cpp index 22e5fdaaf..bfb5399e7 100644 --- a/src/basic/INDEXLIST-Cuda.cpp +++ b/src/basic/INDEXLIST-Cuda.cpp @@ -306,7 +306,7 @@ void INDEXLIST::runCudaVariantImpl(VariantID vid) INDEXLIST_DATA_TEARDOWN_CUDA; } else { - std::cout << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-Hip.cpp b/src/basic/INDEXLIST-Hip.cpp index 1450244e8..9707f635d 100644 --- a/src/basic/INDEXLIST-Hip.cpp +++ b/src/basic/INDEXLIST-Hip.cpp @@ -306,7 +306,7 @@ void INDEXLIST::runHipVariantImpl(VariantID vid) INDEXLIST_DATA_TEARDOWN_HIP; } else { - std::cout << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-OMP.cpp b/src/basic/INDEXLIST-OMP.cpp index 681e62699..bb8f61654 100644 --- a/src/basic/INDEXLIST-OMP.cpp +++ b/src/basic/INDEXLIST-OMP.cpp @@ -193,7 +193,7 @@ void INDEXLIST::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ default : { ignore_unused(run_reps, ibegin, iend, x, list); - std::cout << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-OMPTarget.cpp b/src/basic/INDEXLIST-OMPTarget.cpp index 99f875b27..36980a013 100644 --- a/src/basic/INDEXLIST-OMPTarget.cpp +++ b/src/basic/INDEXLIST-OMPTarget.cpp @@ -86,7 +86,7 @@ void INDEXLIST::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG default : { ignore_unused(run_reps, ibegin, iend, x, list); - std::cout << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-Seq.cpp b/src/basic/INDEXLIST-Seq.cpp index e7bb7139b..25bc60e72 100644 --- a/src/basic/INDEXLIST-Seq.cpp +++ b/src/basic/INDEXLIST-Seq.cpp @@ -73,7 +73,7 @@ void INDEXLIST::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx #endif default : { - std::cout << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST_3LOOP-Cuda.cpp b/src/basic/INDEXLIST_3LOOP-Cuda.cpp index 22e263b4f..264000ba1 100644 --- a/src/basic/INDEXLIST_3LOOP-Cuda.cpp +++ b/src/basic/INDEXLIST_3LOOP-Cuda.cpp @@ -166,7 +166,7 @@ void INDEXLIST_3LOOP::runCudaVariantImpl(VariantID vid) INDEXLIST_3LOOP_DATA_TEARDOWN_CUDA; } else { - std::cout << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST_3LOOP-Hip.cpp b/src/basic/INDEXLIST_3LOOP-Hip.cpp index 205b662dd..3ec769058 100644 --- a/src/basic/INDEXLIST_3LOOP-Hip.cpp +++ b/src/basic/INDEXLIST_3LOOP-Hip.cpp @@ -188,7 +188,7 @@ void INDEXLIST_3LOOP::runHipVariantImpl(VariantID vid) INDEXLIST_3LOOP_DATA_TEARDOWN_HIP; } else { - std::cout << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST_3LOOP-OMP.cpp b/src/basic/INDEXLIST_3LOOP-OMP.cpp index 3ba12ea0a..d84736ef7 100644 --- a/src/basic/INDEXLIST_3LOOP-OMP.cpp +++ b/src/basic/INDEXLIST_3LOOP-OMP.cpp @@ -234,7 +234,7 @@ void INDEXLIST_3LOOP::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } default : { - std::cout << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST_3LOOP-OMPTarget.cpp b/src/basic/INDEXLIST_3LOOP-OMPTarget.cpp index d58dbe9e6..3acf16049 100644 --- a/src/basic/INDEXLIST_3LOOP-OMPTarget.cpp +++ b/src/basic/INDEXLIST_3LOOP-OMPTarget.cpp @@ -99,7 +99,7 @@ void INDEXLIST_3LOOP::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUS } default : { - std::cout << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST_3LOOP-Seq.cpp b/src/basic/INDEXLIST_3LOOP-Seq.cpp index 14f62a8a7..33c35585c 100644 --- a/src/basic/INDEXLIST_3LOOP-Seq.cpp +++ b/src/basic/INDEXLIST_3LOOP-Seq.cpp @@ -149,7 +149,7 @@ void INDEXLIST_3LOOP::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu #endif default : { - std::cout << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INIT3-Cuda.cpp b/src/basic/INIT3-Cuda.cpp index 212a1e3a2..2c1aaaeaa 100644 --- a/src/basic/INIT3-Cuda.cpp +++ b/src/basic/INIT3-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT3-Hip.cpp b/src/basic/INIT3-Hip.cpp index af3276a7d..0275bc458 100644 --- a/src/basic/INIT3-Hip.cpp +++ b/src/basic/INIT3-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT3-OMP.cpp b/src/basic/INIT3-OMP.cpp index 8df233cc5..25d31585c 100644 --- a/src/basic/INIT3-OMP.cpp +++ b/src/basic/INIT3-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT3-OMPTarget.cpp b/src/basic/INIT3-OMPTarget.cpp index d2b5eb127..a64432836 100644 --- a/src/basic/INIT3-OMPTarget.cpp +++ b/src/basic/INIT3-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT3-Seq.cpp b/src/basic/INIT3-Seq.cpp index 1a1cb228a..a757497f1 100644 --- a/src/basic/INIT3-Seq.cpp +++ b/src/basic/INIT3-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index fc3fd024d..130fbc3b4 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT3::~INIT3() diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 44f3622de..aed67bfeb 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D-Cuda.cpp b/src/basic/INIT_VIEW1D-Cuda.cpp index be7a0bf97..9e0900c62 100644 --- a/src/basic/INIT_VIEW1D-Cuda.cpp +++ b/src/basic/INIT_VIEW1D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D-Hip.cpp b/src/basic/INIT_VIEW1D-Hip.cpp index 6f9d41924..0a3c65d05 100644 --- a/src/basic/INIT_VIEW1D-Hip.cpp +++ b/src/basic/INIT_VIEW1D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D-OMP.cpp b/src/basic/INIT_VIEW1D-OMP.cpp index a0544574d..742270ff6 100644 --- a/src/basic/INIT_VIEW1D-OMP.cpp +++ b/src/basic/INIT_VIEW1D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D-OMPTarget.cpp b/src/basic/INIT_VIEW1D-OMPTarget.cpp index fba84b747..1a6105b0f 100644 --- a/src/basic/INIT_VIEW1D-OMPTarget.cpp +++ b/src/basic/INIT_VIEW1D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D-Seq.cpp b/src/basic/INIT_VIEW1D-Seq.cpp index f6df5969b..cf6ec9945 100644 --- a/src/basic/INIT_VIEW1D-Seq.cpp +++ b/src/basic/INIT_VIEW1D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index bd752aa06..dd52b057e 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT_VIEW1D::~INIT_VIEW1D() diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b51d38b79..f3770f69a 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET-Cuda.cpp b/src/basic/INIT_VIEW1D_OFFSET-Cuda.cpp index 2f7f6d34a..6a52ae296 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-Cuda.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D_OFFSET-Hip.cpp b/src/basic/INIT_VIEW1D_OFFSET-Hip.cpp index ae98f56ab..e7c4b4961 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-Hip.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D_OFFSET-OMP.cpp b/src/basic/INIT_VIEW1D_OFFSET-OMP.cpp index 23a1c4e6f..8fb7c0129 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-OMP.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp b/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp index a3091a076..2ece34afe 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D_OFFSET-Seq.cpp b/src/basic/INIT_VIEW1D_OFFSET-Seq.cpp index 8b4db722b..2f1d9b15b 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-Seq.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 165cd5544..2dcda4f4a 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT_VIEW1D_OFFSET::~INIT_VIEW1D_OFFSET() diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index be597496d..d32f59c7b 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED-Cuda.cpp b/src/basic/MAT_MAT_SHARED-Cuda.cpp index 0f702b83f..8196d8951 100644 --- a/src/basic/MAT_MAT_SHARED-Cuda.cpp +++ b/src/basic/MAT_MAT_SHARED-Cuda.cpp @@ -202,87 +202,87 @@ void MAT_MAT_SHARED::runCudaVariantImpl(VariantID vid) constexpr bool async = true; - using launch_policy = RAJA::expt::LaunchPolicy>; + using launch_policy = RAJA::LaunchPolicy>; - using teams_x = RAJA::expt::LoopPolicy; + using teams_x = RAJA::LoopPolicy; - using teams_y = RAJA::expt::LoopPolicy; + using teams_y = RAJA::LoopPolicy; - using threads_x = RAJA::expt::LoopPolicy; + using threads_x = RAJA::LoopPolicy; - using threads_y = RAJA::expt::LoopPolicy; + using threads_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(Nx, Ny), - RAJA::expt::Threads(tile_size, tile_size)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(Nx, Ny), + RAJA::Threads(tile_size, tile_size)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + RAJA::loop(ctx, RAJA::RangeSegment(0, Ny), [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + RAJA::loop(ctx, RAJA::RangeSegment(0, Nx), [&](Index_type bx) { MAT_MAT_SHARED_BODY_0(tile_size) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop for (Index_type k = 0; k < (tile_size + N - 1) / tile_size; k++) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); } // for (k) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (bx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (by) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/basic/MAT_MAT_SHARED-Hip.cpp b/src/basic/MAT_MAT_SHARED-Hip.cpp index ac1b4fb7f..7840e4c68 100644 --- a/src/basic/MAT_MAT_SHARED-Hip.cpp +++ b/src/basic/MAT_MAT_SHARED-Hip.cpp @@ -207,86 +207,86 @@ void MAT_MAT_SHARED::runHipVariantImpl(VariantID vid) constexpr bool async = true; - using launch_policy = RAJA::expt::LaunchPolicy>; + using launch_policy = RAJA::LaunchPolicy>; - using teams_x = RAJA::expt::LoopPolicy; + using teams_x = RAJA::LoopPolicy; - using teams_y = RAJA::expt::LoopPolicy; + using teams_y = RAJA::LoopPolicy; - using threads_x = RAJA::expt::LoopPolicy; + using threads_x = RAJA::LoopPolicy; - using threads_y = RAJA::expt::LoopPolicy; + using threads_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::launch( - RAJA::expt::Grid(RAJA::expt::Teams(Nx, Ny), - RAJA::expt::Threads(tile_size, tile_size)), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch( + RAJA::LaunchParams(RAJA::Teams(Nx, Ny), + RAJA::Threads(tile_size, tile_size)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + RAJA::loop(ctx, RAJA::RangeSegment(0, Ny), [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + RAJA::loop(ctx, RAJA::RangeSegment(0, Nx), [&](Index_type bx) { MAT_MAT_SHARED_BODY_0(tile_size) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop for (Index_type k = 0; k < (tile_size + N - 1) / tile_size; k++) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); } // for (k) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, tile_size), + RAJA::loop(ctx, RAJA::RangeSegment(0, tile_size), [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(tile_size) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (bx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (by) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/basic/MAT_MAT_SHARED-OMP.cpp b/src/basic/MAT_MAT_SHARED-OMP.cpp index 484550704..aff737ea6 100644 --- a/src/basic/MAT_MAT_SHARED-OMP.cpp +++ b/src/basic/MAT_MAT_SHARED-OMP.cpp @@ -159,85 +159,85 @@ void MAT_MAT_SHARED::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( case RAJA_OpenMP: { //Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using outer_y = RAJA::expt::LoopPolicy; + using outer_y = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { //Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch(RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch(RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + RAJA::loop(ctx, RAJA::RangeSegment(0, Ny), [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + RAJA::loop(ctx, RAJA::RangeSegment(0, Nx), [&](Index_type bx) { MAT_MAT_SHARED_BODY_0(TL_SZ) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); } // for (k) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (bx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (by) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/basic/MAT_MAT_SHARED-Seq.cpp b/src/basic/MAT_MAT_SHARED-Seq.cpp index b412daa32..3696c1bd6 100644 --- a/src/basic/MAT_MAT_SHARED-Seq.cpp +++ b/src/basic/MAT_MAT_SHARED-Seq.cpp @@ -155,86 +155,86 @@ void MAT_MAT_SHARED::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun case RAJA_Seq: { - using launch_policy = RAJA::expt::LaunchPolicy; + using launch_policy = RAJA::LaunchPolicy; - using outer_x = RAJA::expt::LoopPolicy; + using outer_x = RAJA::LoopPolicy; - using outer_y = RAJA::expt::LoopPolicy; + using outer_y = RAJA::LoopPolicy; - using inner_x = RAJA::expt::LoopPolicy; + using inner_x = RAJA::LoopPolicy; - using inner_y = RAJA::expt::LoopPolicy; + using inner_y = RAJA::LoopPolicy; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { //Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch(RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::launch(RAJA::LaunchParams(), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + RAJA::loop(ctx, RAJA::RangeSegment(0, Ny), [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + RAJA::loop(ctx, RAJA::RangeSegment(0, Nx), [&](Index_type bx) { MAT_MAT_SHARED_BODY_0(TL_SZ) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; k++) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop ctx.teamSync(); } // for (k) - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + RAJA::loop(ctx, RAJA::RangeSegment(0, TL_SZ), [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(TL_SZ) } - ); // RAJA::expt::loop + ); // RAJA::loop } - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (bx) - ); // RAJA::expt::loop + ); // RAJA::loop } // lambda (by) - ); // RAJA::expt::loop + ); // RAJA::loop } // outer lambda (ctx) - ); // RAJA::expt::launch + ); // RAJA::launch } // loop over kernel reps stopTimer(); diff --git a/src/basic/MULADDSUB-Cuda.cpp b/src/basic/MULADDSUB-Cuda.cpp index 3d8254c07..7b94a846a 100644 --- a/src/basic/MULADDSUB-Cuda.cpp +++ b/src/basic/MULADDSUB-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/MULADDSUB-Hip.cpp b/src/basic/MULADDSUB-Hip.cpp index cb9076b38..73b83e85f 100644 --- a/src/basic/MULADDSUB-Hip.cpp +++ b/src/basic/MULADDSUB-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/MULADDSUB-OMP.cpp b/src/basic/MULADDSUB-OMP.cpp index 1204e9018..6c9bb2038 100644 --- a/src/basic/MULADDSUB-OMP.cpp +++ b/src/basic/MULADDSUB-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/MULADDSUB-OMPTarget.cpp b/src/basic/MULADDSUB-OMPTarget.cpp index 2048284b5..c2487b04f 100644 --- a/src/basic/MULADDSUB-OMPTarget.cpp +++ b/src/basic/MULADDSUB-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/MULADDSUB-Seq.cpp b/src/basic/MULADDSUB-Seq.cpp index e93da7871..5c23bc9b0 100644 --- a/src/basic/MULADDSUB-Seq.cpp +++ b/src/basic/MULADDSUB-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index d1c180b8e..1d5f1bfcc 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } MULADDSUB::~MULADDSUB() diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index 30ad11a54..e604a34c8 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT-Cuda.cpp b/src/basic/NESTED_INIT-Cuda.cpp index 7528c5cec..ba804881c 100644 --- a/src/basic/NESTED_INIT-Cuda.cpp +++ b/src/basic/NESTED_INIT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/NESTED_INIT-Hip.cpp b/src/basic/NESTED_INIT-Hip.cpp index 49c050f6f..de17fdf02 100644 --- a/src/basic/NESTED_INIT-Hip.cpp +++ b/src/basic/NESTED_INIT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/NESTED_INIT-OMP.cpp b/src/basic/NESTED_INIT-OMP.cpp index 4471740df..4ce2aa1d0 100644 --- a/src/basic/NESTED_INIT-OMP.cpp +++ b/src/basic/NESTED_INIT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/NESTED_INIT-OMPTarget.cpp b/src/basic/NESTED_INIT-OMPTarget.cpp index 2c0b2389f..83f47cd7a 100644 --- a/src/basic/NESTED_INIT-OMPTarget.cpp +++ b/src/basic/NESTED_INIT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/NESTED_INIT-Seq.cpp b/src/basic/NESTED_INIT-Seq.cpp index 48da1b37a..0a9c81ff6 100644 --- a/src/basic/NESTED_INIT-Seq.cpp +++ b/src/basic/NESTED_INIT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index ef9550d97..4b9183245 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -62,6 +62,8 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } NESTED_INIT::~NESTED_INIT() diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 13da52cf2..ccaf7079e 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC-Cuda.cpp b/src/basic/PI_ATOMIC-Cuda.cpp index 6f28f8c2a..00d30ce8d 100644 --- a/src/basic/PI_ATOMIC-Cuda.cpp +++ b/src/basic/PI_ATOMIC-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_ATOMIC-Hip.cpp b/src/basic/PI_ATOMIC-Hip.cpp index 605696676..723a260b0 100644 --- a/src/basic/PI_ATOMIC-Hip.cpp +++ b/src/basic/PI_ATOMIC-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_ATOMIC-OMP.cpp b/src/basic/PI_ATOMIC-OMP.cpp index 4296ed845..7fd29bc28 100644 --- a/src/basic/PI_ATOMIC-OMP.cpp +++ b/src/basic/PI_ATOMIC-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_ATOMIC-OMPTarget.cpp b/src/basic/PI_ATOMIC-OMPTarget.cpp index 2a059f99b..2745e212a 100644 --- a/src/basic/PI_ATOMIC-OMPTarget.cpp +++ b/src/basic/PI_ATOMIC-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_ATOMIC-Seq.cpp b/src/basic/PI_ATOMIC-Seq.cpp index 486201caa..da13269a4 100644 --- a/src/basic/PI_ATOMIC-Seq.cpp +++ b/src/basic/PI_ATOMIC-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 776883232..5810f7598 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -53,6 +53,8 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PI_ATOMIC::~PI_ATOMIC() diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index 10c674dda..803d6202f 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE-Cuda.cpp b/src/basic/PI_REDUCE-Cuda.cpp index 80c8fd3b4..3fb294cb1 100644 --- a/src/basic/PI_REDUCE-Cuda.cpp +++ b/src/basic/PI_REDUCE-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE-Hip.cpp b/src/basic/PI_REDUCE-Hip.cpp index bb34ed37e..c0ec0becf 100644 --- a/src/basic/PI_REDUCE-Hip.cpp +++ b/src/basic/PI_REDUCE-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE-OMP.cpp b/src/basic/PI_REDUCE-OMP.cpp index bc03012c3..44da3e5b5 100644 --- a/src/basic/PI_REDUCE-OMP.cpp +++ b/src/basic/PI_REDUCE-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE-OMPTarget.cpp b/src/basic/PI_REDUCE-OMPTarget.cpp index a942839b4..5dc133661 100644 --- a/src/basic/PI_REDUCE-OMPTarget.cpp +++ b/src/basic/PI_REDUCE-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE-Seq.cpp b/src/basic/PI_REDUCE-Seq.cpp index 6d6f885fe..85302a11a 100644 --- a/src/basic/PI_REDUCE-Seq.cpp +++ b/src/basic/PI_REDUCE-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 16d0770ba..84c38ce67 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index c7cc3258a..49fca096d 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT-Cuda.cpp b/src/basic/REDUCE3_INT-Cuda.cpp index 0e7c645e7..16f0d4968 100644 --- a/src/basic/REDUCE3_INT-Cuda.cpp +++ b/src/basic/REDUCE3_INT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT-Hip.cpp b/src/basic/REDUCE3_INT-Hip.cpp index 8e92cb123..3fa0da3d5 100644 --- a/src/basic/REDUCE3_INT-Hip.cpp +++ b/src/basic/REDUCE3_INT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT-OMP.cpp b/src/basic/REDUCE3_INT-OMP.cpp index 0f759180b..5428d6087 100644 --- a/src/basic/REDUCE3_INT-OMP.cpp +++ b/src/basic/REDUCE3_INT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT-OMPTarget.cpp b/src/basic/REDUCE3_INT-OMPTarget.cpp index 7db4bbdd6..8a1eba5f5 100644 --- a/src/basic/REDUCE3_INT-OMPTarget.cpp +++ b/src/basic/REDUCE3_INT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT-Seq.cpp b/src/basic/REDUCE3_INT-Seq.cpp index a3e42cf14..5481959f0 100644 --- a/src/basic/REDUCE3_INT-Seq.cpp +++ b/src/basic/REDUCE3_INT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index dee6d3a5e..d5cf9f4c2 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -56,6 +56,8 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } REDUCE3_INT::~REDUCE3_INT() diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index 93ad766c2..e82c2cf05 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT-Cuda.cpp b/src/basic/REDUCE_STRUCT-Cuda.cpp index 52bec116d..502d4ca79 100644 --- a/src/basic/REDUCE_STRUCT-Cuda.cpp +++ b/src/basic/REDUCE_STRUCT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE_STRUCT-Hip.cpp b/src/basic/REDUCE_STRUCT-Hip.cpp index f72306107..2bffbc5fc 100644 --- a/src/basic/REDUCE_STRUCT-Hip.cpp +++ b/src/basic/REDUCE_STRUCT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE_STRUCT-OMP.cpp b/src/basic/REDUCE_STRUCT-OMP.cpp index ec60e6919..7ac22faa2 100644 --- a/src/basic/REDUCE_STRUCT-OMP.cpp +++ b/src/basic/REDUCE_STRUCT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -140,7 +140,7 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } default : { - std::cout << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE_STRUCT-OMPTarget.cpp b/src/basic/REDUCE_STRUCT-OMPTarget.cpp index baa2b67d2..2c9b8fdb8 100644 --- a/src/basic/REDUCE_STRUCT-OMPTarget.cpp +++ b/src/basic/REDUCE_STRUCT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -27,29 +27,34 @@ namespace basic const size_t threads_per_team = 256; #define REDUCE_STRUCT_DATA_SETUP_OMP_TARGET \ + PointsType points; \ + points.N = getActualProblemSize(); \ +\ int hid = omp_get_initial_device(); \ int did = omp_get_default_device(); \ \ - allocAndInitHipDeviceData(points.x, m_x, points.N, did, hid); \ - allocAndInitHipDeviceData(points.y, m_y, points.N, did, hid); + allocAndInitOpenMPDeviceData(points.x, m_x, points.N, did, hid); \ + allocAndInitOpenMPDeviceData(points.y, m_y, points.N, did, hid); #define REDUCE_STRUCT_DATA_TEARDOWN_OMP_TARGET \ - deallocHipDeviceData(points.x); \ - deallocHipDeviceData(points.y); \ + deallocOpenMPDeviceData(points.x, did); \ + deallocOpenMPDeviceData(points.y, did); \ -void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid) +void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - REDUCE_STRUCT_DATA_SETUP; - if ( vid == Base_OpenMPTarget ) { REDUCE_STRUCT_DATA_SETUP_OMP_TARGET; + Real_ptr xa = points.x; + Real_ptr ya = points.y; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -57,7 +62,7 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid) Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; - #pragma omp target is_device_ptr(vec) device( did ) map(tofrom:xsum, xmin, xmax, ysum, ymin, ymax) + #pragma omp target is_device_ptr(xa, ya) device( did ) map(tofrom:xsum, xmin, xmax, ysum, ymin, ymax) #pragma omp teams distribute parallel for thread_limit(threads_per_team) schedule(static,1) \ reduction(+:xsum) \ reduction(min:xmin) \ @@ -66,7 +71,12 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid) reduction(min:ymin), \ reduction(max:ymax) for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE_STRUCT_BODY; + xsum += xa[i] ; + xmin = RAJA_MIN(xmin, xa[i]) ; + xmax = RAJA_MAX(xmax, xa[i]) ; + ysum += ya[i] ; + ymin = RAJA_MIN(ymin, ya[i]) ; + ymax = RAJA_MAX(ymax, ya[i]) ; } points.SetCenter(xsum/points.N, ysum/points.N); @@ -115,7 +125,7 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid) REDUCE_STRUCT_DATA_TEARDOWN_OMP_TARGET; } else { - std::cout << "\n REDUCE_STRUCT : Unknown OMP Target variant id = " << vid << std::endl; + getCout() << "\n REDUCE_STRUCT : Unknown OMP Target variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE_STRUCT-Seq.cpp b/src/basic/REDUCE_STRUCT-Seq.cpp index 71fe7a471..4958d962b 100644 --- a/src/basic/REDUCE_STRUCT-Seq.cpp +++ b/src/basic/REDUCE_STRUCT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -128,7 +128,7 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune #endif // RUN_RAJA_SEQ default : { - std::cout << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp index d5c33f906..4479d832e 100644 --- a/src/basic/REDUCE_STRUCT.cpp +++ b/src/basic/REDUCE_STRUCT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index b1d188ca1..425e7796e 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -38,7 +38,7 @@ #define REDUCE_STRUCT_DATA_SETUP \ - points points; \ + PointsType points; \ points.N = getActualProblemSize(); \ points.x = m_x; \ points.y = m_y; \ @@ -94,7 +94,7 @@ class REDUCE_STRUCT : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); - struct points{ + struct PointsType { Int_type N; Real_ptr x, y; @@ -114,7 +114,8 @@ class REDUCE_STRUCT : public KernelBase Real_type center[2] = {0.0,0.0}; Real_type xmin, xmax; Real_type ymin, ymax; - }; + }; + private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; @@ -122,7 +123,7 @@ class REDUCE_STRUCT : public KernelBase Real_type m_init_sum; Real_type m_init_min; Real_type m_init_max; - points m_points; + PointsType m_points; Real_type X_MIN = 0.0, X_MAX = 100.0; Real_type Y_MIN = 0.0, Y_MAX = 50.0; Real_type Lx = (X_MAX) - (X_MIN); diff --git a/src/basic/TRAP_INT-Cuda.cpp b/src/basic/TRAP_INT-Cuda.cpp index d2845cbfd..379a9cb34 100644 --- a/src/basic/TRAP_INT-Cuda.cpp +++ b/src/basic/TRAP_INT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/TRAP_INT-Hip.cpp b/src/basic/TRAP_INT-Hip.cpp index 63101962f..c665c2d2c 100644 --- a/src/basic/TRAP_INT-Hip.cpp +++ b/src/basic/TRAP_INT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/TRAP_INT-OMP.cpp b/src/basic/TRAP_INT-OMP.cpp index bd4c3c24b..dadaa5baa 100644 --- a/src/basic/TRAP_INT-OMP.cpp +++ b/src/basic/TRAP_INT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/TRAP_INT-OMPTarget.cpp b/src/basic/TRAP_INT-OMPTarget.cpp index 53dab376a..62d9e085b 100644 --- a/src/basic/TRAP_INT-OMPTarget.cpp +++ b/src/basic/TRAP_INT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/TRAP_INT-Seq.cpp b/src/basic/TRAP_INT-Seq.cpp index 310d5e9ef..6e1cc6f19 100644 --- a/src/basic/TRAP_INT-Seq.cpp +++ b/src/basic/TRAP_INT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 3bf939f38..eaac3ffda 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } TRAP_INT::~TRAP_INT() diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index 50acfeb79..e64932dbe 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 0e459fa62..47db79deb 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/src/common/CudaDataUtils.hpp b/src/common/CudaDataUtils.hpp index 5010f982e..53f10f6c0 100644 --- a/src/common/CudaDataUtils.hpp +++ b/src/common/CudaDataUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/DataUtils.cpp b/src/common/DataUtils.cpp index 6856d1f6c..ad607e413 100644 --- a/src/common/DataUtils.cpp +++ b/src/common/DataUtils.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/DataUtils.hpp b/src/common/DataUtils.hpp index 887f54f82..d3ca53cfd 100644 --- a/src/common/DataUtils.hpp +++ b/src/common/DataUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index 7b5db6887..85af7dbcf 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -41,6 +42,64 @@ namespace rajaperf { using namespace std; +namespace { + +#ifdef RAJA_PERFSUITE_ENABLE_MPI + +void Allreduce(const Checksum_type* send, Checksum_type* recv, int count, + MPI_Op op, MPI_Comm comm) +{ + if (op != MPI_SUM && op != MPI_MIN && op != MPI_MAX) { + getCout() << "\nUnsupported MPI_OP..." << endl; + } + + if (Checksum_MPI_type == MPI_DATATYPE_NULL) { + + int rank = -1; + MPI_Comm_rank(comm, &rank); + int num_ranks = -1; + MPI_Comm_size(comm, &num_ranks); + + std::vector gather(count*num_ranks); + + MPI_Gather(send, count*sizeof(Checksum_type), MPI_BYTE, + gather.data(), count*sizeof(Checksum_type), MPI_BYTE, + 0, comm); + + if (rank == 0) { + + for (int i = 0; i < count; ++i) { + + Checksum_type val = gather[i]; + + for (int r = 1; r < num_ranks; ++r) { + if (op == MPI_SUM) { + val += gather[i + r*count]; + } else if (op == MPI_MIN) { + val = std::min(val, gather[i + r*count]); + } else if (op == MPI_MAX) { + val = std::max(val, gather[i + r*count]); + } + } + recv[i] = val; + } + + } + + MPI_Bcast(recv, count*sizeof(Checksum_type), MPI_BYTE, + 0, comm); + + } else { + + MPI_Allreduce(send, recv, count, Checksum_MPI_type, op, comm); + } + +} + +#endif + +} + Executor::Executor(int argc, char** argv) : run_params(argc, argv), reference_vid(NumVariants), @@ -1423,17 +1482,14 @@ void Executor::writeChecksumReport(ostream& file) } #ifdef RAJA_PERFSUITE_ENABLE_MPI - if (Checksum_MPI_type == MPI_DATATYPE_NULL) { - getCout() << "Checksum_MPI_type is invalid" << endl; - } // get stats for checksums std::vector> checksums_sum(variant_ids.size()); for (size_t iv = 0; iv < variant_ids.size(); ++iv) { size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]); checksums_sum[iv].resize(num_tunings, 0.0); - MPI_Allreduce(checksums[iv].data(), checksums_sum[iv].data(), num_tunings, - Checksum_MPI_type, MPI_SUM, MPI_COMM_WORLD); + Allreduce(checksums[iv].data(), checksums_sum[iv].data(), num_tunings, + MPI_SUM, MPI_COMM_WORLD); } std::vector> checksums_avg(variant_ids.size()); @@ -1464,12 +1520,12 @@ void Executor::writeChecksumReport(ostream& file) checksums_abs_diff_max[iv].resize(num_tunings, 0.0); checksums_abs_diff_sum[iv].resize(num_tunings, 0.0); - MPI_Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_min[iv].data(), num_tunings, - Checksum_MPI_type, MPI_MIN, MPI_COMM_WORLD); - MPI_Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_max[iv].data(), num_tunings, - Checksum_MPI_type, MPI_MAX, MPI_COMM_WORLD); - MPI_Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_sum[iv].data(), num_tunings, - Checksum_MPI_type, MPI_SUM, MPI_COMM_WORLD); + Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_min[iv].data(), num_tunings, + MPI_MIN, MPI_COMM_WORLD); + Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_max[iv].data(), num_tunings, + MPI_MAX, MPI_COMM_WORLD); + Allreduce(checksums_abs_diff[iv].data(), checksums_abs_diff_sum[iv].data(), num_tunings, + MPI_SUM, MPI_COMM_WORLD); } std::vector> checksums_abs_diff_avg(variant_ids.size()); @@ -1495,8 +1551,8 @@ void Executor::writeChecksumReport(ostream& file) for (size_t iv = 0; iv < variant_ids.size(); ++iv) { size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]); checksums_abs_diff_stddev[iv].resize(num_tunings, 0.0); - MPI_Allreduce(checksums_abs_diff_diff2avg2.data(), checksums_abs_diff_stddev.data(), num_tunings, - Checksum_MPI_type, MPI_SUM, MPI_COMM_WORLD); + Allreduce(checksums_abs_diff_diff2avg2[iv].data(), checksums_abs_diff_stddev[iv].data(), num_tunings, + MPI_SUM, MPI_COMM_WORLD); for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) { checksums_abs_diff_stddev[iv][tune_idx] = std::sqrt(checksums_abs_diff_stddev[iv][tune_idx] / num_ranks); } @@ -1564,7 +1620,7 @@ string Executor::getReportTitle(CSVRepMode mode, RunParams::CombinerOpt combiner title = string("Max "); } break; - default : { cout << "\n Unknown CSV combiner mode = " << combiner << endl; } + default : { getCout() << "\n Unknown CSV combiner mode = " << combiner << endl; } } switch ( mode ) { case CSVRepMode::Timing : { @@ -1606,7 +1662,7 @@ long double Executor::getReportDataEntry(CSVRepMode mode, retval = kern->getMaxTime(vid, tune_idx); } break; - default : { cout << "\n Unknown CSV combiner mode = " << combiner << endl; } + default : { getCout() << "\n Unknown CSV combiner mode = " << combiner << endl; } } break; } @@ -1630,7 +1686,7 @@ long double Executor::getReportDataEntry(CSVRepMode mode, kern->getMaxTime(vid, tune_idx); } break; - default : { cout << "\n Unknown CSV combiner mode = " << combiner << endl; } + default : { getCout() << "\n Unknown CSV combiner mode = " << combiner << endl; } } } else { retval = 0.0; diff --git a/src/common/Executor.hpp b/src/common/Executor.hpp index a4403f1eb..6bca5a1d2 100644 --- a/src/common/Executor.hpp +++ b/src/common/Executor.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/GPUUtils.hpp b/src/common/GPUUtils.hpp index 76362ee1c..eceabcfea 100644 --- a/src/common/GPUUtils.hpp +++ b/src/common/GPUUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/HipDataUtils.hpp b/src/common/HipDataUtils.hpp index a3871d31e..570f9619c 100644 --- a/src/common/HipDataUtils.hpp +++ b/src/common/HipDataUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index a07a6bbbb..764770ca7 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } +// Required for running Kokkos + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + setKokkosTuningDefinitions(vid); +#endif + break; + } default : { #if 0 @@ -238,6 +246,12 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + runKokkosVariant(vid, tune_idx); +#endif + } default : { #if 0 diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8d74d6e05..885650336 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -90,6 +90,10 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif +#if defined(RUN_KOKKOS) + virtual void setKokkosTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } +#endif // // Getter methods used to generate kernel execution summary @@ -230,6 +234,12 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif +#if defined(RUN_KOKKOS) + virtual void runKokkosVariant(VariantID vid, size_t tune_idx) + { + getCout() << "\n KernelBase: Unimplemented Kokkos variant id = " << vid << std::endl; + } +#endif protected: const RunParams& run_params; diff --git a/src/common/KokkosViewUtils.hpp b/src/common/KokkosViewUtils.hpp new file mode 100644 index 000000000..856fcb6f1 --- /dev/null +++ b/src/common/KokkosViewUtils.hpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Types and methods for managing Suite kernels, variants, features, etc.. +/// + +#ifndef KokkosViewUtils_HPP +#define KokkosViewUtils_HPP + +#include "Kokkos_Core.hpp" + +#include +#include + +namespace rajaperf { +template struct PointerOfNdimensions; + +template struct PointerOfNdimensions { + using type = PointedAt; +}; + +template struct PointerOfNdimensions { + using type = + typename PointerOfNdimensions::type *; +}; + +// This templated function is used to wrap pointers +// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views +// +template +auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) -> + Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space> + +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // The boundaries parameter pack contains the array dimenions; + // An allocation is implicitly made here + device_view_type device_data_copy("StringName", boundaries...); + + mirror_view_type cpu_to_gpu_mirror = + Kokkos::create_mirror_view(device_data_copy); + + Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); + + Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); + + // Kokkos::View return type + + return device_data_copy; +} + +// This function will move data in a Kokkos::View back to host from device, +// and will be stored in the existing pointer(s) +template +void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, + Boundaries... boundaries) { + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // Layout is optimal for gpu, but data are actually located on CPU + mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); + + // Actual copying of the data from the gpu (my_view) back to the cpu + Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); + + // This copies from the mirror on the host cpu back to the existing + // pointer(s) + Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); +} + +} // namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/common/OpenMPTargetDataUtils.hpp b/src/common/OpenMPTargetDataUtils.hpp index fc36cef1f..633f7ec51 100644 --- a/src/common/OpenMPTargetDataUtils.hpp +++ b/src/common/OpenMPTargetDataUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/OutputUtils.cpp b/src/common/OutputUtils.cpp index 96b09c542..54160cf17 100644 --- a/src/common/OutputUtils.cpp +++ b/src/common/OutputUtils.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -31,19 +31,9 @@ namespace rajaperf */ std::string recursiveMkdir(const std::string& in_path) { -#ifdef RAJA_PERFSUITE_ENABLE_MPI - int rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - // Processes wait for rank 0 to make the directories before proceeding - if (rank != 0) { - MPI_Barrier(MPI_COMM_WORLD); - } -#endif - - std::string dir; - std::string path = in_path; + + // remove leading "." or "./" if ( !path.empty() ) { if ( path.at(0) == '.' ) { if ( path.length() > 2 && path.at(1) == '/' ) { @@ -56,6 +46,16 @@ std::string recursiveMkdir(const std::string& in_path) if ( path.empty() ) return std::string(); +#ifdef RAJA_PERFSUITE_ENABLE_MPI + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + // Processes wait for rank 0 to make the directories before proceeding + if (rank != 0) { + MPI_Barrier(MPI_COMM_WORLD); + } +#endif + // ---------------------------------------- std::string outpath = path; diff --git a/src/common/OutputUtils.hpp b/src/common/OutputUtils.hpp index 6ba77a408..5641401e9 100644 --- a/src/common/OutputUtils.hpp +++ b/src/common/OutputUtils.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 1bc10c31d..27650cf56 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -78,6 +78,7 @@ // // Apps kernels... // +#include "apps/CONVECTION3DPA.hpp" #include "apps/WIP-COUPLE.hpp" #include "apps/DEL_DOT_VEC_2D.hpp" #include "apps/DIFFUSION3DPA.hpp" @@ -99,6 +100,8 @@ #include "algorithm/SORT.hpp" #include "algorithm/SORTPAIRS.hpp" #include "algorithm/REDUCE_SUM.hpp" +#include "algorithm/MEMSET.hpp" +#include "algorithm/MEMCPY.hpp" #include @@ -211,6 +214,7 @@ static const std::string KernelNames [] = // // Apps kernels... // + std::string("Apps_CONVECTION3DPA"), std::string("Apps_COUPLE"), std::string("Apps_DEL_DOT_VEC_2D"), std::string("Apps_DIFFUSION3DPA"), @@ -232,6 +236,8 @@ static const std::string KernelNames [] = std::string("Algorithm_SORT"), std::string("Algorithm_SORTPAIRS"), std::string("Algorithm_REDUCE_SUM"), + std::string("Algorithm_MEMSET"), + std::string("Algorithm_MEMCPY"), std::string("Unknown Kernel") // Keep this at the end and DO NOT remove.... @@ -272,6 +278,8 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Kokkos_Lambda"), + std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... }; // END VariantNames @@ -416,6 +424,12 @@ bool isVariantAvailable(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if ( vid == Kokkos_Lambda ) { + ret_val = true; + } +#endif + return ret_val; } @@ -471,6 +485,12 @@ bool isVariantGPU(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if ( vid == Kokkos_Lambda ) { + ret_val = true; + } +#endif + return ret_val; } @@ -699,6 +719,10 @@ KernelBase* getKernelObject(KernelID kid, // // Apps kernels... // + case Apps_CONVECTION3DPA : { + kernel = new apps::CONVECTION3DPA(run_params); + break; + } case Apps_COUPLE : { kernel = new apps::COUPLE(run_params); break; @@ -771,6 +795,14 @@ KernelBase* getKernelObject(KernelID kid, kernel = new algorithm::REDUCE_SUM(run_params); break; } + case Algorithm_MEMSET: { + kernel = new algorithm::MEMSET(run_params); + break; + } + case Algorithm_MEMCPY: { + kernel = new algorithm::MEMCPY(run_params); + break; + } default: { getCout() << "\n Unknown Kernel ID = " << kid << std::endl; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index fad672137..e73bd9888 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -7,7 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// /// -/// Tyoes and methods for managing Suite kernels, variants, features, etc.. +/// Types and methods for managing Suite kernels, variants, features, etc.. /// #ifndef RAJAPerfSuite_HPP @@ -131,6 +131,7 @@ enum KernelID { // // Apps kernels... // + Apps_CONVECTION3DPA, Apps_COUPLE, Apps_DEL_DOT_VEC_2D, Apps_DIFFUSION3DPA, @@ -152,6 +153,8 @@ enum KernelID { Algorithm_SORT, Algorithm_SORTPAIRS, Algorithm_REDUCE_SUM, + Algorithm_MEMSET, + Algorithm_MEMCPY, NumKernels // Keep this one last and NEVER comment out (!!) @@ -191,6 +194,8 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Kokkos_Lambda, + NumVariants // Keep this one last and NEVER comment out (!!) }; diff --git a/src/common/RPTypes.hpp b/src/common/RPTypes.hpp index d9a2865b3..b86f6b7b6 100644 --- a/src/common/RPTypes.hpp +++ b/src/common/RPTypes.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp index 26cc63742..e32bd95da 100644 --- a/src/common/RunParams.cpp +++ b/src/common/RunParams.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -330,7 +330,7 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) got_someting = true; int gpu_block_size = ::atoi( opt.c_str() ); if ( gpu_block_size <= 0 ) { - std::cout << "\nBad input:" + getCout() << "\nBad input:" << " must give --gpu_block_size POSITIVE values (int)" << std::endl; input_state = BadInput; @@ -341,7 +341,7 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) } } if (!got_someting) { - std::cout << "\nBad input:" + getCout() << "\nBad input:" << " must give --gpu_block_size one or more values (int)" << std::endl; input_state = BadInput; diff --git a/src/common/RunParams.hpp b/src/common/RunParams.hpp index d0e7d81bf..b9e808533 100644 --- a/src/common/RunParams.hpp +++ b/src/common/RunParams.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals-kokkos/CMakeLists.txt b/src/lcals-kokkos/CMakeLists.txt new file mode 100644 index 000000000..47e5b48c8 --- /dev/null +++ b/src/lcals-kokkos/CMakeLists.txt @@ -0,0 +1,26 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + + +blt_add_library( + NAME lcals-kokkos + SOURCES + DIFF_PREDICT-Kokkos.cpp + EOS-Kokkos.cpp + FIRST_DIFF-Kokkos.cpp + FIRST_MIN-Kokkos.cpp + FIRST_SUM-Kokkos.cpp + GEN_LIN_RECUR-Kokkos.cpp + HYDRO_1D-Kokkos.cpp + HYDRO_2D-Kokkos.cpp + INT_PREDICT-Kokkos.cpp + PLANCKIAN-Kokkos.cpp + TRIDIAG_ELIM-Kokkos.cpp + INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../lcals + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp b/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp new file mode 100644 index 000000000..4c7dd6b39 --- /dev/null +++ b/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DIFF_PREDICT.hpp" + +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void DIFF_PREDICT::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DIFF_PREDICT_DATA_SETUP; + + // Wrapping pointers in Kokkos Views + // Nota bene: get the actual array size to catch errors + + auto px_flat_view = getViewFromPointer(px, iend * 14); + auto cx_flat_view = getViewFromPointer(cx, iend * 14); + + // 2D View w/ runtime and compile time dimension + Kokkos::View px_view(px_flat_view.data(), + iend); + Kokkos::View cx_view(cx_flat_view.data(), + iend); + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "DIFF_PREDICT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + // DIFF_PREDICT_BODY with Kokkos Views + Real_type ar, br, cr; + + ar = cx_view(i, 4); + br = ar - px_view(i, 4); + px_view(i, 4) = ar; + cr = br - px_view(i, 5); + px_view(i, 5) = br; + ar = cr - px_view(i, 6); + px_view(i, 6) = cr; + br = ar - px_view(i, 7); + px_view(i, 7) = ar; + cr = br - px_view(i, 8); + px_view(i, 8) = br; + ar = cr - px_view(i, 9); + px_view(i, 9) = cr; + br = ar - px_view(i, 10); + px_view(i, 10) = ar; + cr = br - px_view(i, 11); + px_view(i, 11) = br; + px_view(i, 13) = cr - px_view(i, 12); + px_view(i, 12) = cr; + }); + } + Kokkos::fence(); + stopTimer(); + break; + } + + default: { + std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(px, px_flat_view, iend * 14); + moveDataToHostFromKokkosView(cx, cx_flat_view, iend * 14); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif diff --git a/src/lcals-kokkos/EOS-Kokkos.cpp b/src/lcals-kokkos/EOS-Kokkos.cpp new file mode 100644 index 000000000..be30c0b60 --- /dev/null +++ b/src/lcals-kokkos/EOS-Kokkos.cpp @@ -0,0 +1,68 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "EOS.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void EOS::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + EOS_DATA_SETUP; + + // Wrap pointers in Kokkos Views + auto x_view = getViewFromPointer(x, iend + 7); + auto y_view = getViewFromPointer(y, iend + 7); + auto z_view = getViewFromPointer(z, iend + 7); + auto u_view = getViewFromPointer(u, iend + 7); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "EOS_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + x_view[i] = + u_view[i] + r * (z_view[i] + r * y_view[i]) + + t * (u_view[i + 3] + r * (u_view[i + 2] + r * u_view[i + 1]) + + t * (u_view[i + 6] + + q * (u_view[i + 5] + q * u_view[i + 4]))); + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend + 7); + moveDataToHostFromKokkosView(y, y_view, iend + 7); + moveDataToHostFromKokkosView(z, z_view, iend + 7); + moveDataToHostFromKokkosView(u, u_view, iend + 7); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp b/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp new file mode 100644 index 000000000..02ae5097e --- /dev/null +++ b/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp @@ -0,0 +1,63 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_DIFF.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void FIRST_DIFF::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_DIFF_DATA_SETUP; + + // Wrap pointers in Kokkos Views + auto x_view = getViewFromPointer(x, iend + 1); + auto y_view = getViewFromPointer(y, iend + 1); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "FIRST_DIFF_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + x_view[i] = y_view[i + 1] - y_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + } + } + + // View dimensions must match array dimensions! + moveDataToHostFromKokkosView(x, x_view, iend + 1); + moveDataToHostFromKokkosView(y, y_view, iend + 1); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp b/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp new file mode 100644 index 000000000..cd2957436 --- /dev/null +++ b/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_MIN.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void FIRST_MIN::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_MIN_DATA_SETUP; + + // Wrap pointers in Kokkkos Views + auto x_view = getViewFromPointer(x, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // The third template argument, `Kokkos::HostSpace`, is the memory space + // where the result will be stored; the result will be stored in the same + // place the kernel is called from , i.e., the Host + using reducer_type = + Kokkos::MinLoc; + // must hold the value and the location (host/device) ; + // Create a custom-type variable to hold the result from parallel_reduce + reducer_type::value_type min_result_obj; + + Kokkos::parallel_reduce( + "FIRST_MIN_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i, reducer_type::value_type & mymin) { + if (x_view[i] < mymin.val) { + mymin.val = x_view[i]; + mymin.loc = i; + } + + // Kokkos handles a MinLoc type + }, + reducer_type(min_result_obj)); + + m_minloc = min_result_obj.loc; + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp b/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp new file mode 100644 index 000000000..b7da76fd0 --- /dev/null +++ b/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp @@ -0,0 +1,62 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_SUM.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void FIRST_SUM::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + FIRST_SUM_DATA_SETUP; + + // Wrap pointers in Kokkos Views + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "FIRST_SUM_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + x_view[i] = y_view[i - 1] + y_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp b/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp new file mode 100644 index 000000000..00960c3aa --- /dev/null +++ b/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp @@ -0,0 +1,80 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "GEN_LIN_RECUR.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void GEN_LIN_RECUR::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + GEN_LIN_RECUR_DATA_SETUP; + + // Wrap pointers in Kokkos Views + + auto b5_view = getViewFromPointer(b5, iend); + auto sa_view = getViewFromPointer(sa, iend); + auto sb_view = getViewFromPointer(sb, iend); + auto stb5_view = getViewFromPointer(stb5, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "GEN_LIN_RECUR_Kokkos Kokkos Lambda -- BODY1", + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(Index_type k) { + b5_view[k + kb5i] = sa_view[k] + stb5_view[k] * sb_view[k]; + stb5_view[k] = b5_view[k + kb5i] - stb5_view[k]; + }); + + Kokkos::parallel_for( + "GEN_LIN_RECUR_Kokkos Kokkos Lambda -- BODY2", + Kokkos::RangePolicy(1, N + 1), + KOKKOS_LAMBDA(Index_type i) { + Index_type k = N - i; + + b5_view[k + kb5i] = sa_view[k] + stb5_view[k] * sb_view[k]; + stb5_view[k] = b5_view[k + kb5i] - stb5_view[k]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid + << std::endl; + } + } + + moveDataToHostFromKokkosView(b5, b5_view, iend); + moveDataToHostFromKokkosView(sa, sa_view, iend); + moveDataToHostFromKokkosView(sb, sb_view, iend); + moveDataToHostFromKokkosView(stb5, stb5_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp b/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp new file mode 100644 index 000000000..20e05fde4 --- /dev/null +++ b/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp @@ -0,0 +1,66 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_1D.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void HYDRO_1D::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + HYDRO_1D_DATA_SETUP; + + // Wrap pointers in Kokkos Views + auto x_view = getViewFromPointer(x, iend + 12); + auto y_view = getViewFromPointer(y, iend + 12); + auto z_view = getViewFromPointer(z, iend + 12); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "HYDRO_1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + x_view[i] = + q + y_view[i] * (r * z_view[i + 10] + t * z_view[i + 11]); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend + 12); + moveDataToHostFromKokkosView(y, y_view, iend + 12); + moveDataToHostFromKokkosView(z, z_view, iend + 12); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp b/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp new file mode 100644 index 000000000..45761b11e --- /dev/null +++ b/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp @@ -0,0 +1,129 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_2D.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void HYDRO_2D::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + + const Index_type run_reps = getRunReps(); + const Index_type kbeg = 1; + const Index_type kend = m_kn - 1; + const Index_type jbeg = 1; + const Index_type jend = m_jn - 1; + + HYDRO_2D_DATA_SETUP; + + // Wrap input pointers in Kokkos::Views (2D Views) + auto zadat_view = getViewFromPointer(zadat, kn, jn); + auto zbdat_view = getViewFromPointer(zbdat, kn, jn); + auto zmdat_view = getViewFromPointer(zmdat, kn, jn); + auto zpdat_view = getViewFromPointer(zpdat, kn, jn); + auto zqdat_view = getViewFromPointer(zqdat, kn, jn); + auto zrdat_view = getViewFromPointer(zrdat, kn, jn); + auto zudat_view = getViewFromPointer(zudat, kn, jn); + auto zvdat_view = getViewFromPointer(zvdat, kn, jn); + auto zzdat_view = getViewFromPointer(zzdat, kn, jn); + + // Wrap output pointers into Kokkos::Views + auto zroutdat_view = getViewFromPointer(zroutdat, kn, jn); + auto zzoutdat_view = getViewFromPointer(zzoutdat, kn, jn); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Use MDRangePolicy for multidimensional arrays + + Kokkos::parallel_for( + "HYDRO_2D_Kokkos Kokkos_Lambda--BODY1", + Kokkos::MDRangePolicy>({kbeg, jbeg}, {kend, jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + zadat_view(k, j) = + (zpdat_view(k + 1, j - 1) + zqdat_view(k + 1, j - 1) - + zpdat_view(k, j - 1) - zqdat_view(k, j - 1)) * + (zrdat_view(k, j) + zrdat_view(k, j - 1)) / + (zmdat_view(k, j - 1) + zmdat_view(k + 1, j - 1)); + + zbdat_view(k, j) = (zpdat_view(k, j - 1) + zqdat_view(k, j - 1) - + zpdat_view(k, j) - zqdat_view(k, j)) * + (zrdat_view(k, j) + zrdat_view(k - 1, j)) / + (zmdat_view(k, j) + zmdat_view(k, j - 1)); + }); + + Kokkos::parallel_for( + "HYDRO_2D_Kokkos Kokkos_Lambda--BODY2", + Kokkos::MDRangePolicy>({kbeg, jbeg}, {kend, jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + zudat_view(k, j) += + s * + (zadat_view(k, j) * (zzdat_view(k, j) - zzdat_view(k, j + 1)) - + zadat_view(k, j - 1) * + (zzdat_view(k, j) - zzdat_view(k, j - 1)) - + zbdat_view(k, j) * (zzdat_view(k, j) - zzdat_view(k - 1, j)) + + zbdat_view(k + 1, j) * + (zzdat_view(k, j) - zzdat_view(k + 1, j))); + zvdat_view(k, j) += + s * + (zadat_view(k, j) * (zrdat_view(k, j) - zrdat_view(k, j + 1)) - + zadat_view(k, j - 1) * + (zrdat_view(k, j) - zrdat_view(k, j - 1)) - + zbdat_view(k, j) * (zrdat_view(k, j) - zrdat_view(k - 1, j)) + + zbdat_view(k + 1, j) * + (zrdat_view(k, j) - zrdat_view(k + 1, j))); + }); + + Kokkos::parallel_for( + "HYDRO_2D_Kokkos Kokkos_Lambda--BODY3", + Kokkos::MDRangePolicy>({kbeg, jbeg}, {kend, jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + zroutdat_view(k, j) = zrdat_view(k, j) + t * zudat_view(k, j); + zzoutdat_view(k, j) = zzdat_view(k, j) + t * zvdat_view(k, j); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + } + } + + // Expect 9 input Kokkos Views: + moveDataToHostFromKokkosView(zadat, zadat_view, kn, jn); + moveDataToHostFromKokkosView(zbdat, zbdat_view, kn, jn); + moveDataToHostFromKokkosView(zmdat, zmdat_view, kn, jn); + moveDataToHostFromKokkosView(zpdat, zpdat_view, kn, jn); + moveDataToHostFromKokkosView(zqdat, zqdat_view, kn, jn); + moveDataToHostFromKokkosView(zrdat, zrdat_view, kn, jn); + moveDataToHostFromKokkosView(zudat, zudat_view, kn, jn); + moveDataToHostFromKokkosView(zvdat, zvdat_view, kn, jn); + moveDataToHostFromKokkosView(zzdat, zzdat_view, kn, jn); + + // Expect 2 output Views: + moveDataToHostFromKokkosView(zroutdat, zroutdat_view, kn, jn); + moveDataToHostFromKokkosView(zzoutdat, zzoutdat_view, kn, jn); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp b/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp new file mode 100644 index 000000000..451e6fe77 --- /dev/null +++ b/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp @@ -0,0 +1,78 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INT_PREDICT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { + +void INT_PREDICT::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INT_PREDICT_DATA_SETUP; + + // Wrap pointer in Kokkos View, and adjust indices + auto px_view = getViewFromPointer(px, iend * 13); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Declare variables in INT_PREDICT.hpp + Real_type dm22 = m_dm22; + Real_type dm23 = m_dm23; + Real_type dm24 = m_dm24; + Real_type dm25 = m_dm25; + Real_type dm26 = m_dm26; + Real_type dm27 = m_dm27; + Real_type dm28 = m_dm28; + + Kokkos::parallel_for( + "INT_PREDICT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + px_view[i] = + dm28 * px_view[i + offset * 12] + + dm27 * px_view[i + offset * 11] + + dm26 * px_view[i + offset * 10] + + dm25 * px_view[i + offset * 9] + + dm24 * px_view[i + offset * 8] + + dm23 * px_view[i + offset * 7] + + dm22 * px_view[i + offset * 6] + + c0 * (px_view[i + offset * 4] + px_view[i + offset * 5]) + + px_view[i + offset * 2]; + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(px, px_view, iend * 13); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp b/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp new file mode 100644 index 000000000..b2c582790 --- /dev/null +++ b/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PLANCKIAN.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include +#include + +namespace rajaperf { +namespace lcals { + +void PLANCKIAN::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PLANCKIAN_DATA_SETUP; + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + auto u_view = getViewFromPointer(u, iend); + auto v_view = getViewFromPointer(v, iend); + auto w_view = getViewFromPointer(w, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "PLANCKIAN_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + y_view[i] = u_view[i] / v_view[i]; + w_view[i] = x_view[i] / (exp(y_view[i]) - 1.0); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); + moveDataToHostFromKokkosView(u, u_view, iend); + moveDataToHostFromKokkosView(v, v_view, iend); + moveDataToHostFromKokkosView(w, w_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp b/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp new file mode 100644 index 000000000..ac0943dd8 --- /dev/null +++ b/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp @@ -0,0 +1,64 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIDIAG_ELIM.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace lcals { +void TRIDIAG_ELIM::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = m_N; + + TRIDIAG_ELIM_DATA_SETUP; + + // Wrap pointers in Kokkos Views + auto xout_view = getViewFromPointer(xout, iend); + auto xin_view = getViewFromPointer(xin, iend); + auto y_view = getViewFromPointer(y, iend); + auto z_view = getViewFromPointer(z, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "TRIDIAG_ELIM_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + xout_view[i] = z_view[i] * (y_view[i] - xin_view[i - 1]); + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(xout, xout_view, iend); + moveDataToHostFromKokkosView(xin, xin_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); + moveDataToHostFromKokkosView(z, z_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf +#endif // RUN_KOKKOS diff --git a/src/lcals/CMakeLists.txt b/src/lcals/CMakeLists.txt index 5f88c8c69..f767bbd0b 100644 --- a/src/lcals/CMakeLists.txt +++ b/src/lcals/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/src/lcals/DIFF_PREDICT-Cuda.cpp b/src/lcals/DIFF_PREDICT-Cuda.cpp index 0ef286507..fe7f0a187 100644 --- a/src/lcals/DIFF_PREDICT-Cuda.cpp +++ b/src/lcals/DIFF_PREDICT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/DIFF_PREDICT-Hip.cpp b/src/lcals/DIFF_PREDICT-Hip.cpp index 4f076157d..04b137e8e 100644 --- a/src/lcals/DIFF_PREDICT-Hip.cpp +++ b/src/lcals/DIFF_PREDICT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/DIFF_PREDICT-OMP.cpp b/src/lcals/DIFF_PREDICT-OMP.cpp index e83c208bd..09da23262 100644 --- a/src/lcals/DIFF_PREDICT-OMP.cpp +++ b/src/lcals/DIFF_PREDICT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/DIFF_PREDICT-OMPTarget.cpp b/src/lcals/DIFF_PREDICT-OMPTarget.cpp index 44e78452f..206624de6 100644 --- a/src/lcals/DIFF_PREDICT-OMPTarget.cpp +++ b/src/lcals/DIFF_PREDICT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/DIFF_PREDICT-Seq.cpp b/src/lcals/DIFF_PREDICT-Seq.cpp index bff82a6eb..81f35a4fb 100644 --- a/src/lcals/DIFF_PREDICT-Seq.cpp +++ b/src/lcals/DIFF_PREDICT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index 338ba7d0d..785aef275 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -49,6 +49,8 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp index 130071412..3a583381b 100644 --- a/src/lcals/DIFF_PREDICT.hpp +++ b/src/lcals/DIFF_PREDICT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -93,6 +93,7 @@ class DIFF_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/EOS-Cuda.cpp b/src/lcals/EOS-Cuda.cpp index f99828d46..3b7f6213d 100644 --- a/src/lcals/EOS-Cuda.cpp +++ b/src/lcals/EOS-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/EOS-Hip.cpp b/src/lcals/EOS-Hip.cpp index 0912ce5ce..de71db663 100644 --- a/src/lcals/EOS-Hip.cpp +++ b/src/lcals/EOS-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/EOS-OMP.cpp b/src/lcals/EOS-OMP.cpp index 4a9688f03..7ac9cdb8f 100644 --- a/src/lcals/EOS-OMP.cpp +++ b/src/lcals/EOS-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/EOS-OMPTarget.cpp b/src/lcals/EOS-OMPTarget.cpp index 6cc2f832b..fa58c0fee 100644 --- a/src/lcals/EOS-OMPTarget.cpp +++ b/src/lcals/EOS-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/EOS-Seq.cpp b/src/lcals/EOS-Seq.cpp index 3aaeabdde..52637ff3b 100644 --- a/src/lcals/EOS-Seq.cpp +++ b/src/lcals/EOS-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 27bc43d06..db9e86a75 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,8 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } EOS::~EOS() diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp index f2d38b5e9..9cc202a02 100644 --- a/src/lcals/EOS.hpp +++ b/src/lcals/EOS.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -62,6 +62,7 @@ class EOS : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_DIFF-Cuda.cpp b/src/lcals/FIRST_DIFF-Cuda.cpp index b195c0d46..0acdec349 100644 --- a/src/lcals/FIRST_DIFF-Cuda.cpp +++ b/src/lcals/FIRST_DIFF-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_DIFF-Hip.cpp b/src/lcals/FIRST_DIFF-Hip.cpp index 382fa107e..76ce3b626 100644 --- a/src/lcals/FIRST_DIFF-Hip.cpp +++ b/src/lcals/FIRST_DIFF-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_DIFF-OMP.cpp b/src/lcals/FIRST_DIFF-OMP.cpp index 73c945f3a..a3b814124 100644 --- a/src/lcals/FIRST_DIFF-OMP.cpp +++ b/src/lcals/FIRST_DIFF-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_DIFF-OMPTarget.cpp b/src/lcals/FIRST_DIFF-OMPTarget.cpp index 13c9a9888..d9cc0e350 100644 --- a/src/lcals/FIRST_DIFF-OMPTarget.cpp +++ b/src/lcals/FIRST_DIFF-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_DIFF-Seq.cpp b/src/lcals/FIRST_DIFF-Seq.cpp index 41837ff90..13513b2a3 100644 --- a/src/lcals/FIRST_DIFF-Seq.cpp +++ b/src/lcals/FIRST_DIFF-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index 9272b20d4..9d60d68bb 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -53,6 +53,8 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp index 51de73049..f3f6424f0 100644 --- a/src/lcals/FIRST_DIFF.hpp +++ b/src/lcals/FIRST_DIFF.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,7 @@ class FIRST_DIFF : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_MIN-Cuda.cpp b/src/lcals/FIRST_MIN-Cuda.cpp index f98982860..f32e20888 100644 --- a/src/lcals/FIRST_MIN-Cuda.cpp +++ b/src/lcals/FIRST_MIN-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -27,6 +27,7 @@ namespace lcals #define FIRST_MIN_DATA_TEARDOWN_CUDA \ deallocCudaDeviceData(x); + template < size_t block_size > __launch_bounds__(block_size) __global__ void first_min(Real_ptr x, @@ -37,7 +38,7 @@ __global__ void first_min(Real_ptr x, Index_type i = blockIdx.x * block_size + threadIdx.x; - minloc[ threadIdx.x ] = *dminloc; + minloc[ threadIdx.x ] = dminloc[blockIdx.x]; for ( ; i < iend ; i += gridDim.x * block_size ) { MyMinLoc& mymin = minloc[ threadIdx.x ]; @@ -55,8 +56,8 @@ __global__ void first_min(Real_ptr x, } if ( threadIdx.x == 0 ) { - if ( minloc[ 0 ].val < (*dminloc).val ) { - *dminloc = minloc[ 0 ]; + if ( minloc[ 0 ].val < (dminloc[blockIdx.x]).val ) { + dminloc[blockIdx.x] = minloc[ 0 ]; } } } @@ -75,32 +76,39 @@ void FIRST_MIN::runCudaVariantImpl(VariantID vid) FIRST_MIN_DATA_SETUP_CUDA; - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + MyMinLoc* mymin_block = new MyMinLoc[grid_size]; //per-block min value - FIRST_MIN_MINLOC_INIT; + MyMinLoc* dminloc; + cudaErrchk( cudaMalloc( (void**)&dminloc, + grid_size * sizeof(MyMinLoc) ) ); - MyMinLoc* dminloc; - cudaErrchk( cudaMalloc( (void**)&dminloc, sizeof(MyMinLoc) ) ); - cudaErrchk( cudaMemcpy( dminloc, &mymin, sizeof(MyMinLoc), - cudaMemcpyHostToDevice ) ); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); - first_min<<>>( x, - dminloc, - iend ); - cudaErrchk( cudaGetLastError() ); + FIRST_MIN_MINLOC_INIT; - cudaErrchk( cudaMemcpy( &mymin, dminloc, sizeof(MyMinLoc), - cudaMemcpyDeviceToHost ) ); - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + first_min<<>>(x, dminloc, iend); + + cudaErrchk( cudaGetLastError() ); + cudaErrchk( cudaMemcpy( mymin_block, dminloc, + grid_size * sizeof(MyMinLoc), + cudaMemcpyDeviceToHost ) ); - cudaErrchk( cudaFree( dminloc ) ); + for (Index_type i = 0; i < static_cast(grid_size); i++) { + if ( mymin_block[i].val < mymin.val ) { + mymin = mymin_block[i]; + } + } + m_minloc = RAJA_MAX(m_minloc, mymin.loc); } stopTimer(); + cudaErrchk( cudaFree( dminloc ) ); + delete[] mymin_block; + FIRST_MIN_DATA_TEARDOWN_CUDA; } else if ( vid == RAJA_CUDA ) { @@ -118,7 +126,7 @@ void FIRST_MIN::runCudaVariantImpl(VariantID vid) FIRST_MIN_BODY_RAJA; }); - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = loc.getLoc(); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Hip.cpp b/src/lcals/FIRST_MIN-Hip.cpp index e2b2763cf..debbf6cd5 100644 --- a/src/lcals/FIRST_MIN-Hip.cpp +++ b/src/lcals/FIRST_MIN-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -27,6 +27,7 @@ namespace lcals #define FIRST_MIN_DATA_TEARDOWN_HIP \ deallocHipDeviceData(x); + template < size_t block_size > __launch_bounds__(block_size) __global__ void first_min(Real_ptr x, @@ -37,7 +38,7 @@ __global__ void first_min(Real_ptr x, Index_type i = blockIdx.x * block_size + threadIdx.x; - minloc[ threadIdx.x ] = *dminloc; + minloc[ threadIdx.x ] = dminloc[blockIdx.x]; for ( ; i < iend ; i += gridDim.x * block_size ) { MyMinLoc& mymin = minloc[ threadIdx.x ]; @@ -55,8 +56,8 @@ __global__ void first_min(Real_ptr x, } if ( threadIdx.x == 0 ) { - if ( minloc[ 0 ].val < (*dminloc).val ) { - *dminloc = minloc[ 0 ]; + if ( minloc[ 0 ].val < (dminloc[blockIdx.x]).val ) { + dminloc[blockIdx.x] = minloc[ 0 ]; } } } @@ -75,32 +76,41 @@ void FIRST_MIN::runHipVariantImpl(VariantID vid) FIRST_MIN_DATA_SETUP_HIP; + const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); + MyMinLoc* mymin_block = new MyMinLoc[grid_size]; //per-block min value + + MyMinLoc* dminloc; + hipErrchk( hipMalloc( (void**)&dminloc, + grid_size * sizeof(MyMinLoc) ) ); + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { FIRST_MIN_MINLOC_INIT; - MyMinLoc* dminloc; - hipErrchk( hipMalloc( (void**)&dminloc, sizeof(MyMinLoc) ) ); - hipErrchk( hipMemcpy( dminloc, &mymin, sizeof(MyMinLoc), - hipMemcpyHostToDevice ) ); + hipLaunchKernelGGL( (first_min), grid_size, block_size, + sizeof(MyMinLoc)*block_size, 0, x, + dminloc, + iend ); - const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size); - hipLaunchKernelGGL((first_min), grid_size, block_size, - sizeof(MyMinLoc)*block_size, 0, x, - dminloc, - iend ); - hipErrchk( hipGetLastError() ); + hipErrchk( hipGetLastError() ); + hipErrchk( hipMemcpy( mymin_block, dminloc, + grid_size * sizeof(MyMinLoc), + hipMemcpyDeviceToHost ) ); - hipErrchk( hipMemcpy( &mymin, dminloc, sizeof(MyMinLoc), - hipMemcpyDeviceToHost ) ); - m_minloc = RAJA_MAX(m_minloc, mymin.loc); - - hipErrchk( hipFree( dminloc ) ); + for (Index_type i = 0; i < static_cast(grid_size); i++) { + if ( mymin_block[i].val < mymin.val ) { + mymin = mymin_block[i]; + } + } + m_minloc = mymin.loc; } stopTimer(); + hipErrchk( hipFree( dminloc ) ); + delete[] mymin_block; + FIRST_MIN_DATA_TEARDOWN_HIP; } else if ( vid == RAJA_HIP ) { @@ -118,7 +128,7 @@ void FIRST_MIN::runHipVariantImpl(VariantID vid) FIRST_MIN_BODY_RAJA; }); - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = loc.getLoc(); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index ef7791739..c45470dc0 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -17,7 +17,6 @@ namespace rajaperf namespace lcals { -FIRST_MIN_MINLOC_COMPARE; void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { @@ -46,7 +45,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ FIRST_MIN_BODY; } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = mymin.loc; } stopTimer(); @@ -76,7 +75,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = mymin.loc; } stopTimer(); @@ -97,7 +96,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ FIRST_MIN_BODY_RAJA; }); - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = loc.getLoc(); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMPTarget.cpp b/src/lcals/FIRST_MIN-OMPTarget.cpp index 52472c588..5a4e62c53 100644 --- a/src/lcals/FIRST_MIN-OMPTarget.cpp +++ b/src/lcals/FIRST_MIN-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -35,7 +35,6 @@ namespace lcals #define FIRST_MIN_DATA_TEARDOWN_OMP_TARGET \ deallocOpenMPDeviceData(x, did); -FIRST_MIN_MINLOC_COMPARE; void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { @@ -64,7 +63,7 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG FIRST_MIN_BODY; } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = mymin.loc; } stopTimer(); @@ -86,7 +85,7 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG FIRST_MIN_BODY_RAJA; }); - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = loc.getLoc(); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Seq.cpp b/src/lcals/FIRST_MIN-Seq.cpp index 7bb311675..6adfd4c40 100644 --- a/src/lcals/FIRST_MIN-Seq.cpp +++ b/src/lcals/FIRST_MIN-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -39,7 +39,7 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx FIRST_MIN_BODY; } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = mymin.loc; } stopTimer(); @@ -66,7 +66,7 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx } } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = mymin.loc; } stopTimer(); @@ -87,7 +87,7 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx FIRST_MIN_BODY_RAJA; }); - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = loc.getLoc(); } stopTimer(); diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index 8fe9a8c93..9858f91d3 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,8 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index c10839ec7..dd00d4392 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -43,17 +43,15 @@ struct MyMinLoc { rajaperf::Index_type loc; }; -#define FIRST_MIN_MINLOC_COMPARE \ -MyMinLoc MinLoc_compare(MyMinLoc a, MyMinLoc b) { \ - return a.val < b.val ? a : b ; \ -} #define FIRST_MIN_MINLOC_INIT \ MyMinLoc mymin; \ mymin.val = m_xmin_init; \ mymin.loc = m_initloc; - +inline MyMinLoc MinLoc_compare(MyMinLoc a, MyMinLoc b) { + return a.val < b.val ? a : b ; +} #include "common/KernelBase.hpp" @@ -81,6 +79,7 @@ class FIRST_MIN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_SUM-Cuda.cpp b/src/lcals/FIRST_SUM-Cuda.cpp index 85db3d39d..fbe876576 100644 --- a/src/lcals/FIRST_SUM-Cuda.cpp +++ b/src/lcals/FIRST_SUM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_SUM-Hip.cpp b/src/lcals/FIRST_SUM-Hip.cpp index 1a03619e5..0752f1f3a 100644 --- a/src/lcals/FIRST_SUM-Hip.cpp +++ b/src/lcals/FIRST_SUM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_SUM-OMP.cpp b/src/lcals/FIRST_SUM-OMP.cpp index 58d1a1070..e545538fc 100644 --- a/src/lcals/FIRST_SUM-OMP.cpp +++ b/src/lcals/FIRST_SUM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_SUM-OMPTarget.cpp b/src/lcals/FIRST_SUM-OMPTarget.cpp index afc53dd6c..7275590c7 100644 --- a/src/lcals/FIRST_SUM-OMPTarget.cpp +++ b/src/lcals/FIRST_SUM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_SUM-Seq.cpp b/src/lcals/FIRST_SUM-Seq.cpp index 29417f4c1..fb1be5ac2 100644 --- a/src/lcals/FIRST_SUM-Seq.cpp +++ b/src/lcals/FIRST_SUM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index a9d135446..bd652bd76 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp index 5f019c08c..59c1c0bfd 100644 --- a/src/lcals/FIRST_SUM.hpp +++ b/src/lcals/FIRST_SUM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -55,6 +55,7 @@ class FIRST_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/GEN_LIN_RECUR-Cuda.cpp b/src/lcals/GEN_LIN_RECUR-Cuda.cpp index 76f840294..b98ffe3d6 100644 --- a/src/lcals/GEN_LIN_RECUR-Cuda.cpp +++ b/src/lcals/GEN_LIN_RECUR-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/GEN_LIN_RECUR-Hip.cpp b/src/lcals/GEN_LIN_RECUR-Hip.cpp index 65fef4e8b..165cb4159 100644 --- a/src/lcals/GEN_LIN_RECUR-Hip.cpp +++ b/src/lcals/GEN_LIN_RECUR-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/GEN_LIN_RECUR-OMP.cpp b/src/lcals/GEN_LIN_RECUR-OMP.cpp index 3d40a9e47..660d47273 100644 --- a/src/lcals/GEN_LIN_RECUR-OMP.cpp +++ b/src/lcals/GEN_LIN_RECUR-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/GEN_LIN_RECUR-OMPTarget.cpp b/src/lcals/GEN_LIN_RECUR-OMPTarget.cpp index 1949698fd..cf5214728 100644 --- a/src/lcals/GEN_LIN_RECUR-OMPTarget.cpp +++ b/src/lcals/GEN_LIN_RECUR-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/GEN_LIN_RECUR-Seq.cpp b/src/lcals/GEN_LIN_RECUR-Seq.cpp index efde12463..f4f77bac6 100644 --- a/src/lcals/GEN_LIN_RECUR-Seq.cpp +++ b/src/lcals/GEN_LIN_RECUR-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index b0598aa8e..e8985d77a 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,8 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp index d6d20b43b..9586a69b4 100644 --- a/src/lcals/GEN_LIN_RECUR.hpp +++ b/src/lcals/GEN_LIN_RECUR.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -76,6 +76,7 @@ class GEN_LIN_RECUR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_1D-Cuda.cpp b/src/lcals/HYDRO_1D-Cuda.cpp index 901ca786b..669828618 100644 --- a/src/lcals/HYDRO_1D-Cuda.cpp +++ b/src/lcals/HYDRO_1D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_1D-Hip.cpp b/src/lcals/HYDRO_1D-Hip.cpp index d39ec0f7e..b025a9f4e 100644 --- a/src/lcals/HYDRO_1D-Hip.cpp +++ b/src/lcals/HYDRO_1D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_1D-OMP.cpp b/src/lcals/HYDRO_1D-OMP.cpp index 29ea4db01..f2088205a 100644 --- a/src/lcals/HYDRO_1D-OMP.cpp +++ b/src/lcals/HYDRO_1D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_1D-OMPTarget.cpp b/src/lcals/HYDRO_1D-OMPTarget.cpp index d154b473f..0a15ca887 100644 --- a/src/lcals/HYDRO_1D-OMPTarget.cpp +++ b/src/lcals/HYDRO_1D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_1D-Seq.cpp b/src/lcals/HYDRO_1D-Seq.cpp index 2833cf6bc..168d02656 100644 --- a/src/lcals/HYDRO_1D-Seq.cpp +++ b/src/lcals/HYDRO_1D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 5ce1d0700..ef8f12eb2 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -56,6 +56,8 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp index 692e40a8e..dd61f112c 100644 --- a/src/lcals/HYDRO_1D.hpp +++ b/src/lcals/HYDRO_1D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,7 @@ class HYDRO_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_2D-Cuda.cpp b/src/lcals/HYDRO_2D-Cuda.cpp index 21c320a60..d44f94315 100644 --- a/src/lcals/HYDRO_2D-Cuda.cpp +++ b/src/lcals/HYDRO_2D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_2D-Hip.cpp b/src/lcals/HYDRO_2D-Hip.cpp index 3180c5c10..8b295b159 100644 --- a/src/lcals/HYDRO_2D-Hip.cpp +++ b/src/lcals/HYDRO_2D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_2D-OMP.cpp b/src/lcals/HYDRO_2D-OMP.cpp index 532ee258c..bd0023035 100644 --- a/src/lcals/HYDRO_2D-OMP.cpp +++ b/src/lcals/HYDRO_2D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_2D-OMPTarget.cpp b/src/lcals/HYDRO_2D-OMPTarget.cpp index 18e6ff004..3897ab634 100644 --- a/src/lcals/HYDRO_2D-OMPTarget.cpp +++ b/src/lcals/HYDRO_2D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_2D-Seq.cpp b/src/lcals/HYDRO_2D-Seq.cpp index 3db534ffc..1f67b2aa7 100644 --- a/src/lcals/HYDRO_2D-Seq.cpp +++ b/src/lcals/HYDRO_2D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 9b6c2a643..c23d1e8c6 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -71,6 +71,8 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index 4363ea633..e735abde5 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -153,6 +153,7 @@ class HYDRO_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/INT_PREDICT-Cuda.cpp b/src/lcals/INT_PREDICT-Cuda.cpp index aaed2219e..306f10d58 100644 --- a/src/lcals/INT_PREDICT-Cuda.cpp +++ b/src/lcals/INT_PREDICT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/INT_PREDICT-Hip.cpp b/src/lcals/INT_PREDICT-Hip.cpp index 22914bff3..b7e0b5c50 100644 --- a/src/lcals/INT_PREDICT-Hip.cpp +++ b/src/lcals/INT_PREDICT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/INT_PREDICT-OMP.cpp b/src/lcals/INT_PREDICT-OMP.cpp index 4e67db9c4..29b167881 100644 --- a/src/lcals/INT_PREDICT-OMP.cpp +++ b/src/lcals/INT_PREDICT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/INT_PREDICT-OMPTarget.cpp b/src/lcals/INT_PREDICT-OMPTarget.cpp index 86b8b8169..f166b971f 100644 --- a/src/lcals/INT_PREDICT-OMPTarget.cpp +++ b/src/lcals/INT_PREDICT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/INT_PREDICT-Seq.cpp b/src/lcals/INT_PREDICT-Seq.cpp index 83a41071b..ce202a596 100644 --- a/src/lcals/INT_PREDICT-Seq.cpp +++ b/src/lcals/INT_PREDICT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index c2062fffa..e2d8c93c2 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -49,6 +49,8 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp index 7a3c6fda6..a81ae6fb2 100644 --- a/src/lcals/INT_PREDICT.hpp +++ b/src/lcals/INT_PREDICT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -72,6 +72,7 @@ class INT_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/PLANCKIAN-Cuda.cpp b/src/lcals/PLANCKIAN-Cuda.cpp index c831aab2e..a751c5a69 100644 --- a/src/lcals/PLANCKIAN-Cuda.cpp +++ b/src/lcals/PLANCKIAN-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/PLANCKIAN-Hip.cpp b/src/lcals/PLANCKIAN-Hip.cpp index 1b8c6050b..55d7e1f6f 100644 --- a/src/lcals/PLANCKIAN-Hip.cpp +++ b/src/lcals/PLANCKIAN-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/PLANCKIAN-OMP.cpp b/src/lcals/PLANCKIAN-OMP.cpp index e802a96fd..cc90067eb 100644 --- a/src/lcals/PLANCKIAN-OMP.cpp +++ b/src/lcals/PLANCKIAN-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/PLANCKIAN-OMPTarget.cpp b/src/lcals/PLANCKIAN-OMPTarget.cpp index 02858604f..04dd16e9d 100644 --- a/src/lcals/PLANCKIAN-OMPTarget.cpp +++ b/src/lcals/PLANCKIAN-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/PLANCKIAN-Seq.cpp b/src/lcals/PLANCKIAN-Seq.cpp index efd372444..04d3e4536 100644 --- a/src/lcals/PLANCKIAN-Seq.cpp +++ b/src/lcals/PLANCKIAN-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 59de57231..fd97ecf3e 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -49,6 +49,8 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp index 46fba63db..92b55fc95 100644 --- a/src/lcals/PLANCKIAN.hpp +++ b/src/lcals/PLANCKIAN.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,7 @@ class PLANCKIAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/TRIDIAG_ELIM-Cuda.cpp b/src/lcals/TRIDIAG_ELIM-Cuda.cpp index 654d027a9..fde94b97d 100644 --- a/src/lcals/TRIDIAG_ELIM-Cuda.cpp +++ b/src/lcals/TRIDIAG_ELIM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/TRIDIAG_ELIM-Hip.cpp b/src/lcals/TRIDIAG_ELIM-Hip.cpp index dab19cc07..c35b37eb6 100644 --- a/src/lcals/TRIDIAG_ELIM-Hip.cpp +++ b/src/lcals/TRIDIAG_ELIM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/TRIDIAG_ELIM-OMP.cpp b/src/lcals/TRIDIAG_ELIM-OMP.cpp index a78c4a210..8f31c9493 100644 --- a/src/lcals/TRIDIAG_ELIM-OMP.cpp +++ b/src/lcals/TRIDIAG_ELIM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/TRIDIAG_ELIM-OMPTarget.cpp b/src/lcals/TRIDIAG_ELIM-OMPTarget.cpp index ff21303da..7cf12fd3f 100644 --- a/src/lcals/TRIDIAG_ELIM-OMPTarget.cpp +++ b/src/lcals/TRIDIAG_ELIM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/TRIDIAG_ELIM-Seq.cpp b/src/lcals/TRIDIAG_ELIM-Seq.cpp index 8aa6dc451..97d0a7cb0 100644 --- a/src/lcals/TRIDIAG_ELIM-Seq.cpp +++ b/src/lcals/TRIDIAG_ELIM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 05d0100a8..369b3d864 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,8 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp index f593985a5..c95685de9 100644 --- a/src/lcals/TRIDIAG_ELIM.hpp +++ b/src/lcals/TRIDIAG_ELIM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -57,6 +57,7 @@ class TRIDIAG_ELIM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/CMakeLists.txt b/src/polybench/CMakeLists.txt index 5805926f3..f9cd2c1c2 100644 --- a/src/polybench/CMakeLists.txt +++ b/src/polybench/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/src/polybench/POLYBENCH_2MM-Cuda.cpp b/src/polybench/POLYBENCH_2MM-Cuda.cpp index 40b1f5ca3..917fab091 100644 --- a/src/polybench/POLYBENCH_2MM-Cuda.cpp +++ b/src/polybench/POLYBENCH_2MM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM-Hip.cpp b/src/polybench/POLYBENCH_2MM-Hip.cpp index 15ffa80df..6736bc47a 100644 --- a/src/polybench/POLYBENCH_2MM-Hip.cpp +++ b/src/polybench/POLYBENCH_2MM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM-OMP.cpp b/src/polybench/POLYBENCH_2MM-OMP.cpp index 687f93c45..3ae78a2b7 100644 --- a/src/polybench/POLYBENCH_2MM-OMP.cpp +++ b/src/polybench/POLYBENCH_2MM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM-OMPTarget.cpp b/src/polybench/POLYBENCH_2MM-OMPTarget.cpp index ab7860935..a5010753e 100644 --- a/src/polybench/POLYBENCH_2MM-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_2MM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM-Seq.cpp b/src/polybench/POLYBENCH_2MM-Seq.cpp index 6e59576b1..32b40fcd1 100644 --- a/src/polybench/POLYBENCH_2MM-Seq.cpp +++ b/src/polybench/POLYBENCH_2MM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 03119a863..e755b09f9 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp index 0624257f7..e11d4889b 100644 --- a/src/polybench/POLYBENCH_2MM.hpp +++ b/src/polybench/POLYBENCH_2MM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM-Cuda.cpp b/src/polybench/POLYBENCH_3MM-Cuda.cpp index f9b151ebf..25142d722 100644 --- a/src/polybench/POLYBENCH_3MM-Cuda.cpp +++ b/src/polybench/POLYBENCH_3MM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM-Hip.cpp b/src/polybench/POLYBENCH_3MM-Hip.cpp index 4199f0c44..a87c66a2a 100644 --- a/src/polybench/POLYBENCH_3MM-Hip.cpp +++ b/src/polybench/POLYBENCH_3MM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM-OMP.cpp b/src/polybench/POLYBENCH_3MM-OMP.cpp index a45f4dd28..0da458ed1 100644 --- a/src/polybench/POLYBENCH_3MM-OMP.cpp +++ b/src/polybench/POLYBENCH_3MM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM-OMPTarget.cpp b/src/polybench/POLYBENCH_3MM-OMPTarget.cpp index 21c1ce7fa..a760a590a 100644 --- a/src/polybench/POLYBENCH_3MM-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_3MM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM-Seq.cpp b/src/polybench/POLYBENCH_3MM-Seq.cpp index c1ca8c56d..fb1bdd03b 100644 --- a/src/polybench/POLYBENCH_3MM-Seq.cpp +++ b/src/polybench/POLYBENCH_3MM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 75990394c..66062f9c2 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp index 0cf9aabff..4331e3930 100644 --- a/src/polybench/POLYBENCH_3MM.hpp +++ b/src/polybench/POLYBENCH_3MM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI-Cuda.cpp b/src/polybench/POLYBENCH_ADI-Cuda.cpp index a4f92f213..f9c706eae 100644 --- a/src/polybench/POLYBENCH_ADI-Cuda.cpp +++ b/src/polybench/POLYBENCH_ADI-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI-Hip.cpp b/src/polybench/POLYBENCH_ADI-Hip.cpp index f87ec84f1..2fabae5a9 100644 --- a/src/polybench/POLYBENCH_ADI-Hip.cpp +++ b/src/polybench/POLYBENCH_ADI-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI-OMP.cpp b/src/polybench/POLYBENCH_ADI-OMP.cpp index a9409b182..cfe2e81bd 100644 --- a/src/polybench/POLYBENCH_ADI-OMP.cpp +++ b/src/polybench/POLYBENCH_ADI-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI-OMPTarget.cpp b/src/polybench/POLYBENCH_ADI-OMPTarget.cpp index a3cc71346..cb159522b 100644 --- a/src/polybench/POLYBENCH_ADI-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_ADI-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI-Seq.cpp b/src/polybench/POLYBENCH_ADI-Seq.cpp index 854a0fdf9..e99b99451 100644 --- a/src/polybench/POLYBENCH_ADI-Seq.cpp +++ b/src/polybench/POLYBENCH_ADI-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 7d0844e69..e3fe802d7 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp index 7cd579964..848fb9dc4 100644 --- a/src/polybench/POLYBENCH_ADI.hpp +++ b/src/polybench/POLYBENCH_ADI.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX-Cuda.cpp b/src/polybench/POLYBENCH_ATAX-Cuda.cpp index 66b0d3218..e7fc313ab 100644 --- a/src/polybench/POLYBENCH_ATAX-Cuda.cpp +++ b/src/polybench/POLYBENCH_ATAX-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX-Hip.cpp b/src/polybench/POLYBENCH_ATAX-Hip.cpp index 8e1078c89..d5c593416 100644 --- a/src/polybench/POLYBENCH_ATAX-Hip.cpp +++ b/src/polybench/POLYBENCH_ATAX-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX-OMP.cpp b/src/polybench/POLYBENCH_ATAX-OMP.cpp index 504a293a3..5f9d0027b 100644 --- a/src/polybench/POLYBENCH_ATAX-OMP.cpp +++ b/src/polybench/POLYBENCH_ATAX-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX-OMPTarget.cpp b/src/polybench/POLYBENCH_ATAX-OMPTarget.cpp index 1f9c23844..ed2f83ae7 100644 --- a/src/polybench/POLYBENCH_ATAX-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_ATAX-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX-Seq.cpp b/src/polybench/POLYBENCH_ATAX-Seq.cpp index ecb98f3e8..41072d5fe 100644 --- a/src/polybench/POLYBENCH_ATAX-Seq.cpp +++ b/src/polybench/POLYBENCH_ATAX-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 44a805518..b8c79fa90 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp index 8f28a1470..f94ade140 100644 --- a/src/polybench/POLYBENCH_ATAX.hpp +++ b/src/polybench/POLYBENCH_ATAX.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D-Cuda.cpp b/src/polybench/POLYBENCH_FDTD_2D-Cuda.cpp index 6b4e8c636..e8e057267 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-Cuda.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D-Hip.cpp b/src/polybench/POLYBENCH_FDTD_2D-Hip.cpp index 0ca25f1e0..50aeec906 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-Hip.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D-OMP.cpp b/src/polybench/POLYBENCH_FDTD_2D-OMP.cpp index dba8a872a..6d22fafd9 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-OMP.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D-OMPTarget.cpp b/src/polybench/POLYBENCH_FDTD_2D-OMPTarget.cpp index 5bb0d03b3..b8e51f834 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D-Seq.cpp b/src/polybench/POLYBENCH_FDTD_2D-Seq.cpp index 6ab94557d..fe495cc73 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-Seq.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index dce05e76a..863b19c15 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp index 7d3696293..e1d1b67c3 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.hpp +++ b/src/polybench/POLYBENCH_FDTD_2D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Cuda.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Cuda.cpp index 30e9a54b4..1c9e6cae9 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Cuda.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Hip.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Hip.cpp index 99b8ea303..0c71acc56 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Hip.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMP.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMP.cpp index edb2074f1..216e94854 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMP.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMPTarget.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMPTarget.cpp index 6c8a9d5fa..036220707 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp index b9f42b0ed..40aad73e3 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 1022ffe4f..794da77cf 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp index 283231d29..e8a067377 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM-Cuda.cpp b/src/polybench/POLYBENCH_GEMM-Cuda.cpp index 5101ebc00..6b13ad29d 100644 --- a/src/polybench/POLYBENCH_GEMM-Cuda.cpp +++ b/src/polybench/POLYBENCH_GEMM-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM-Hip.cpp b/src/polybench/POLYBENCH_GEMM-Hip.cpp index ed2c7fcff..69352ccec 100644 --- a/src/polybench/POLYBENCH_GEMM-Hip.cpp +++ b/src/polybench/POLYBENCH_GEMM-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM-OMP.cpp b/src/polybench/POLYBENCH_GEMM-OMP.cpp index 53bddc30c..5fb1df691 100644 --- a/src/polybench/POLYBENCH_GEMM-OMP.cpp +++ b/src/polybench/POLYBENCH_GEMM-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM-OMPTarget.cpp b/src/polybench/POLYBENCH_GEMM-OMPTarget.cpp index 7bbf5132b..6a6c9f25a 100644 --- a/src/polybench/POLYBENCH_GEMM-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_GEMM-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM-Seq.cpp b/src/polybench/POLYBENCH_GEMM-Seq.cpp index 51a1f1127..ab7a52c23 100644 --- a/src/polybench/POLYBENCH_GEMM-Seq.cpp +++ b/src/polybench/POLYBENCH_GEMM-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index 0ee1f41be..4af192fc3 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp index ae218397d..33ea77997 100644 --- a/src/polybench/POLYBENCH_GEMM.hpp +++ b/src/polybench/POLYBENCH_GEMM.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER-Cuda.cpp b/src/polybench/POLYBENCH_GEMVER-Cuda.cpp index 652bbf761..67fafc421 100644 --- a/src/polybench/POLYBENCH_GEMVER-Cuda.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER-Hip.cpp b/src/polybench/POLYBENCH_GEMVER-Hip.cpp index 943958e31..cee6f5660 100644 --- a/src/polybench/POLYBENCH_GEMVER-Hip.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER-OMP.cpp b/src/polybench/POLYBENCH_GEMVER-OMP.cpp index 18013e3f7..483913eae 100644 --- a/src/polybench/POLYBENCH_GEMVER-OMP.cpp +++ b/src/polybench/POLYBENCH_GEMVER-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER-OMPTarget.cpp b/src/polybench/POLYBENCH_GEMVER-OMPTarget.cpp index c031bdf04..39b7b50c8 100644 --- a/src/polybench/POLYBENCH_GEMVER-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_GEMVER-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER-Seq.cpp b/src/polybench/POLYBENCH_GEMVER-Seq.cpp index eeee6f0ec..d17f9b709 100644 --- a/src/polybench/POLYBENCH_GEMVER-Seq.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 24a3f3d1b..106e40a29 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp index 80c96fa94..07ecae962 100644 --- a/src/polybench/POLYBENCH_GEMVER.hpp +++ b/src/polybench/POLYBENCH_GEMVER.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV-Cuda.cpp b/src/polybench/POLYBENCH_GESUMMV-Cuda.cpp index 535e24efa..64ad1d875 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Cuda.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV-Hip.cpp b/src/polybench/POLYBENCH_GESUMMV-Hip.cpp index ee39f9c6e..c53cb333e 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Hip.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV-OMP.cpp b/src/polybench/POLYBENCH_GESUMMV-OMP.cpp index 830bb73bf..cffda3a4e 100644 --- a/src/polybench/POLYBENCH_GESUMMV-OMP.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV-OMPTarget.cpp b/src/polybench/POLYBENCH_GESUMMV-OMPTarget.cpp index c4c535bf6..f25e85cfb 100644 --- a/src/polybench/POLYBENCH_GESUMMV-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp index c65897e5d..642d0b463 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index eb527af27..73d2e2ab4 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp index c8f71ee84..32a1b0eae 100644 --- a/src/polybench/POLYBENCH_GESUMMV.hpp +++ b/src/polybench/POLYBENCH_GESUMMV.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D-Cuda.cpp b/src/polybench/POLYBENCH_HEAT_3D-Cuda.cpp index ce6e7769e..9dd8e3564 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Cuda.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D-Hip.cpp b/src/polybench/POLYBENCH_HEAT_3D-Hip.cpp index 00e68aebd..1a6973832 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Hip.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D-OMP.cpp b/src/polybench/POLYBENCH_HEAT_3D-OMP.cpp index 50ca323de..1586866c5 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-OMP.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D-OMPTarget.cpp b/src/polybench/POLYBENCH_HEAT_3D-OMPTarget.cpp index 692689d85..112f07ee1 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp index 4afb06d21..7222e5934 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index 567192b9a..7f46f47a8 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp index 81ab06e0e..8d7eff93c 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.hpp +++ b/src/polybench/POLYBENCH_HEAT_3D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Cuda.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Cuda.cpp index a48e70a84..0363a0108 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Cuda.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Hip.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Hip.cpp index a5ff60dfc..c67a84a66 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Hip.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D-OMP.cpp b/src/polybench/POLYBENCH_JACOBI_1D-OMP.cpp index d813f9d17..e002b02fb 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-OMP.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D-OMPTarget.cpp b/src/polybench/POLYBENCH_JACOBI_1D-OMPTarget.cpp index 1ca122ebb..13b9b5375 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp index f23ccdf06..a7e81fe87 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index f86bb5956..12d2243bc 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp index cb3131490..035096f89 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D-Cuda.cpp b/src/polybench/POLYBENCH_JACOBI_2D-Cuda.cpp index ca6a485ec..f63d1ae25 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-Cuda.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D-Hip.cpp b/src/polybench/POLYBENCH_JACOBI_2D-Hip.cpp index bf03f9b86..22c2d7606 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-Hip.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D-OMP.cpp b/src/polybench/POLYBENCH_JACOBI_2D-OMP.cpp index 4acf70b25..7561f7ed8 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-OMP.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D-OMPTarget.cpp b/src/polybench/POLYBENCH_JACOBI_2D-OMPTarget.cpp index 9538d50f7..752115bce 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_2D-Seq.cpp index 856404f92..25f875d4b 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-Seq.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index 1b4f9378a..4ec1def5f 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp index a2ba63181..49ab2cd40 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT-Cuda.cpp b/src/polybench/POLYBENCH_MVT-Cuda.cpp index 2795cadbb..3abeb804e 100644 --- a/src/polybench/POLYBENCH_MVT-Cuda.cpp +++ b/src/polybench/POLYBENCH_MVT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT-Hip.cpp b/src/polybench/POLYBENCH_MVT-Hip.cpp index 176c41710..787505fec 100644 --- a/src/polybench/POLYBENCH_MVT-Hip.cpp +++ b/src/polybench/POLYBENCH_MVT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT-OMP.cpp b/src/polybench/POLYBENCH_MVT-OMP.cpp index f5dad16b9..74d815e78 100644 --- a/src/polybench/POLYBENCH_MVT-OMP.cpp +++ b/src/polybench/POLYBENCH_MVT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT-OMPTarget.cpp b/src/polybench/POLYBENCH_MVT-OMPTarget.cpp index acd7ad56a..724ec3348 100644 --- a/src/polybench/POLYBENCH_MVT-OMPTarget.cpp +++ b/src/polybench/POLYBENCH_MVT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT-Seq.cpp b/src/polybench/POLYBENCH_MVT-Seq.cpp index 8d115b94e..93ead00c0 100644 --- a/src/polybench/POLYBENCH_MVT-Seq.cpp +++ b/src/polybench/POLYBENCH_MVT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 3354ca97d..f4f03c631 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp index dce40baf2..518d75dd8 100644 --- a/src/polybench/POLYBENCH_MVT.hpp +++ b/src/polybench/POLYBENCH_MVT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/rajaperf_config.hpp.in b/src/rajaperf_config.hpp.in index 43ae990a9..9f14594b5 100644 --- a/src/rajaperf_config.hpp.in +++ b/src/rajaperf_config.hpp.in @@ -9,7 +9,7 @@ */ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream-kokkos/ADD-Kokkos.cpp b/src/stream-kokkos/ADD-Kokkos.cpp new file mode 100644 index 000000000..51e5bdf81 --- /dev/null +++ b/src/stream-kokkos/ADD-Kokkos.cpp @@ -0,0 +1,63 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ADD.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace stream { + +void ADD::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + ADD_DATA_SETUP; + + // Instiating views using getViewFromPointer + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "ADD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { c_view[i] = a_view[i] + b_view[i]; }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); +} + +} // end namespace stream +} // end namespace rajaperf +#endif // (RUN_KOKKOS) diff --git a/src/stream-kokkos/CMakeLists.txt b/src/stream-kokkos/CMakeLists.txt new file mode 100644 index 000000000..4cd38bdf5 --- /dev/null +++ b/src/stream-kokkos/CMakeLists.txt @@ -0,0 +1,18 @@ +############################################################################### +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +blt_add_library( + NAME stream-kokkos + SOURCES ADD-Kokkos.cpp + COPY-Kokkos.cpp + DOT-Kokkos.cpp + MUL-Kokkos.cpp + TRIAD-Kokkos.cpp + INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../stream + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/stream-kokkos/COPY-Kokkos.cpp b/src/stream-kokkos/COPY-Kokkos.cpp new file mode 100644 index 000000000..d363cd944 --- /dev/null +++ b/src/stream-kokkos/COPY-Kokkos.cpp @@ -0,0 +1,58 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "COPY.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace stream { + +void COPY::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + COPY_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + auto c_view = getViewFromPointer(c, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "COPY_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { c_view[i] = a_view[i]; }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); +} + +} // end namespace stream +} // end namespace rajaperf +#endif // (RUN_KOKKOS) diff --git a/src/stream-kokkos/DOT-Kokkos.cpp b/src/stream-kokkos/DOT-Kokkos.cpp new file mode 100644 index 000000000..ca6b0e304 --- /dev/null +++ b/src/stream-kokkos/DOT-Kokkos.cpp @@ -0,0 +1,67 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DOT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace stream { + +void DOT::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DOT_DATA_SETUP; + + // Instantiation of pointer - wrapped Kokkos views: + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + + switch (vid) { + + case Kokkos_Lambda: { + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + parallel_reduce( + "DOT-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i, Real_type & dot_res) { + dot_res += a_view[i] * b_view[i]; + }, + dot); + m_dot += static_cast(dot); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); +} + +} // end namespace stream +} // end namespace rajaperf +#endif // (RUN_KOKKOS) diff --git a/src/stream-kokkos/MUL-Kokkos.cpp b/src/stream-kokkos/MUL-Kokkos.cpp new file mode 100644 index 000000000..aa53b0d66 --- /dev/null +++ b/src/stream-kokkos/MUL-Kokkos.cpp @@ -0,0 +1,59 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MUL.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace stream { + +void MUL::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MUL_DATA_SETUP; + + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "MUL_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { b_view[i] = alpha * c_view[i]; }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); +} + +} // end namespace stream +} // end namespace rajaperf +#endif // (RUN_KOKKOS) diff --git a/src/stream-kokkos/TRIAD-Kokkos.cpp b/src/stream-kokkos/TRIAD-Kokkos.cpp new file mode 100644 index 000000000..3b897a46a --- /dev/null +++ b/src/stream-kokkos/TRIAD-Kokkos.cpp @@ -0,0 +1,62 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIAD.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace stream { + +void TRIAD::runKokkosVariant(VariantID vid, + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRIAD_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + switch (vid) { + + case Kokkos_Lambda: { + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "TRIAD_Kokkos, Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + a_view[i] = b_view[i] + alpha * c_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); +} + +} // end namespace stream +} // end namespace rajaperf +#endif // (RUN_KOKKOS) diff --git a/src/stream/ADD-Cuda.cpp b/src/stream/ADD-Cuda.cpp index 102774a13..93ac53971 100644 --- a/src/stream/ADD-Cuda.cpp +++ b/src/stream/ADD-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/ADD-Hip.cpp b/src/stream/ADD-Hip.cpp index 5e53500c8..d6f2bdf44 100644 --- a/src/stream/ADD-Hip.cpp +++ b/src/stream/ADD-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/ADD-OMP.cpp b/src/stream/ADD-OMP.cpp index ae425a93f..ddd24eb30 100644 --- a/src/stream/ADD-OMP.cpp +++ b/src/stream/ADD-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/ADD-OMPTarget.cpp b/src/stream/ADD-OMPTarget.cpp index 2089472fa..33c12e5cc 100644 --- a/src/stream/ADD-OMPTarget.cpp +++ b/src/stream/ADD-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/ADD-Seq.cpp b/src/stream/ADD-Seq.cpp index f421d44c2..3b7db4445 100644 --- a/src/stream/ADD-Seq.cpp +++ b/src/stream/ADD-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 904c0804b..3d482c80f 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } ADD::~ADD() diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp index 07d0dea79..49e09a602 100644 --- a/src/stream/ADD.hpp +++ b/src/stream/ADD.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,7 @@ class ADD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/CMakeLists.txt b/src/stream/CMakeLists.txt index 2122b7867..03351ff5d 100644 --- a/src/stream/CMakeLists.txt +++ b/src/stream/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/src/stream/COPY-Cuda.cpp b/src/stream/COPY-Cuda.cpp index cddf986ac..279c34027 100644 --- a/src/stream/COPY-Cuda.cpp +++ b/src/stream/COPY-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/COPY-Hip.cpp b/src/stream/COPY-Hip.cpp index fe302a7fc..0287d074c 100644 --- a/src/stream/COPY-Hip.cpp +++ b/src/stream/COPY-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/COPY-OMP.cpp b/src/stream/COPY-OMP.cpp index c1b38e25f..d9a0aa2a9 100644 --- a/src/stream/COPY-OMP.cpp +++ b/src/stream/COPY-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/COPY-OMPTarget.cpp b/src/stream/COPY-OMPTarget.cpp index 823a32b13..f4266bde7 100644 --- a/src/stream/COPY-OMPTarget.cpp +++ b/src/stream/COPY-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/COPY-Seq.cpp b/src/stream/COPY-Seq.cpp index a807c0bee..0ce95ae72 100644 --- a/src/stream/COPY-Seq.cpp +++ b/src/stream/COPY-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 251208a4d..dd447ad71 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } COPY::~COPY() diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp index 0f23bfa68..0544e0d2f 100644 --- a/src/stream/COPY.hpp +++ b/src/stream/COPY.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,7 @@ class COPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/DOT-Cuda.cpp b/src/stream/DOT-Cuda.cpp index de23c290b..b5825a6b7 100644 --- a/src/stream/DOT-Cuda.cpp +++ b/src/stream/DOT-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/DOT-Hip.cpp b/src/stream/DOT-Hip.cpp index 3e75e64ef..0956f3d81 100644 --- a/src/stream/DOT-Hip.cpp +++ b/src/stream/DOT-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/DOT-OMP.cpp b/src/stream/DOT-OMP.cpp index efd8e9ffa..6b7d67e0e 100644 --- a/src/stream/DOT-OMP.cpp +++ b/src/stream/DOT-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/DOT-OMPTarget.cpp b/src/stream/DOT-OMPTarget.cpp index 27f96a5d1..ddbcf7b4d 100644 --- a/src/stream/DOT-OMPTarget.cpp +++ b/src/stream/DOT-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/DOT-Seq.cpp b/src/stream/DOT-Seq.cpp index cde2263de..d10a4abce 100644 --- a/src/stream/DOT-Seq.cpp +++ b/src/stream/DOT-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index 0d9657a8a..c3dba74f1 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DOT::~DOT() diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp index 64d70c630..5912c120a 100644 --- a/src/stream/DOT.hpp +++ b/src/stream/DOT.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -51,6 +51,7 @@ class DOT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/MUL-Cuda.cpp b/src/stream/MUL-Cuda.cpp index 8db12d087..7cbbb083c 100644 --- a/src/stream/MUL-Cuda.cpp +++ b/src/stream/MUL-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/MUL-Hip.cpp b/src/stream/MUL-Hip.cpp index 3e5e3f9f0..69d7a4967 100644 --- a/src/stream/MUL-Hip.cpp +++ b/src/stream/MUL-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/MUL-OMP.cpp b/src/stream/MUL-OMP.cpp index 7b78bf819..3369d0f3d 100644 --- a/src/stream/MUL-OMP.cpp +++ b/src/stream/MUL-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/MUL-OMPTarget.cpp b/src/stream/MUL-OMPTarget.cpp index 7e3141c78..ab02bbccc 100644 --- a/src/stream/MUL-OMPTarget.cpp +++ b/src/stream/MUL-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/MUL-Seq.cpp b/src/stream/MUL-Seq.cpp index 837d26147..dfd078b64 100644 --- a/src/stream/MUL-Seq.cpp +++ b/src/stream/MUL-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 55eced2b0..ff7598965 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,8 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } MUL::~MUL() diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp index 1e79e17f9..3db59092a 100644 --- a/src/stream/MUL.hpp +++ b/src/stream/MUL.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -52,6 +52,7 @@ class MUL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/TRIAD-Cuda.cpp b/src/stream/TRIAD-Cuda.cpp index 234683493..20a53ceda 100644 --- a/src/stream/TRIAD-Cuda.cpp +++ b/src/stream/TRIAD-Cuda.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/TRIAD-Hip.cpp b/src/stream/TRIAD-Hip.cpp index 740727530..d3734c536 100644 --- a/src/stream/TRIAD-Hip.cpp +++ b/src/stream/TRIAD-Hip.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/TRIAD-OMP.cpp b/src/stream/TRIAD-OMP.cpp index f1c5c435d..5d9832d95 100644 --- a/src/stream/TRIAD-OMP.cpp +++ b/src/stream/TRIAD-OMP.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/TRIAD-OMPTarget.cpp b/src/stream/TRIAD-OMPTarget.cpp index c69e6cdbb..5d63d1c24 100644 --- a/src/stream/TRIAD-OMPTarget.cpp +++ b/src/stream/TRIAD-OMPTarget.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/TRIAD-Seq.cpp b/src/stream/TRIAD-Seq.cpp index 0477202c0..f3e68603c 100644 --- a/src/stream/TRIAD-Seq.cpp +++ b/src/stream/TRIAD-Seq.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index 543b19642..34cd06a49 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -56,6 +56,8 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } TRIAD::~TRIAD() diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index 80685ce3c..3f65bf804 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -53,6 +53,7 @@ class TRIAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fe0b732f5..001c81190 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +# Copyright (c) 2017-23, Lawrence Livermore National Security, LLC # and RAJA Performance Suite project contributors. # See the RAJAPerf/LICENSE file for details. # diff --git a/test/test-raja-perf-suite.cpp b/test/test-raja-perf-suite.cpp index 60dbd7a29..dcad95e52 100644 --- a/test/test-raja-perf-suite.cpp +++ b/test/test-raja-perf-suite.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // diff --git a/tpl/RAJA b/tpl/RAJA index 87a5cac67..c2a6b1740 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 87a5cac67214e5e96c941bd652b1c0981e9f2123 +Subproject commit c2a6b1740759ae3ae7c85b35e20dbffbe235355d diff --git a/tpl/kokkos b/tpl/kokkos new file mode 160000 index 000000000..2834f94af --- /dev/null +++ b/tpl/kokkos @@ -0,0 +1 @@ +Subproject commit 2834f94af9b01debf67c1aaa3f0eb0c903d72c8d