diff --git a/.github/workflows/sycl-linux-precommit.yml b/.github/workflows/sycl-linux-precommit.yml index 9d1825067a661..6d578580a13db 100644 --- a/.github/workflows/sycl-linux-precommit.yml +++ b/.github/workflows/sycl-linux-precommit.yml @@ -81,7 +81,7 @@ jobs: with: name: Build e2e tests runner: '["Linux", "build"]' - image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:latest + image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:alldeps image_options: -u 1001 ref: ${{ github.sha }} merge_ref: '' diff --git a/.github/workflows/sycl-rel-nightly.yml b/.github/workflows/sycl-rel-nightly.yml index 96c691451965b..ae33d157e2f7a 100644 --- a/.github/workflows/sycl-rel-nightly.yml +++ b/.github/workflows/sycl-rel-nightly.yml @@ -38,6 +38,7 @@ jobs: build_cache_root: "/__w/" build_artifact_suffix: default build_configure_extra_args: '--hip --cuda' + build_image: ghcr.io/intel/llvm/ubuntu2204_build:latest merge_ref: '' build_ref: sycl-rel-6_0_0 @@ -118,8 +119,8 @@ jobs: if: ${{ github.repository == 'intel/llvm' && needs.check_for_new_commits.outputs.is_new_commit != 'false' }} uses: ./.github/workflows/sycl-windows-build.yml with: + ref: sycl-rel-6_0_0 merge_ref: '' - build_ref: sycl-rel-6_0_0 # We upload both Linux/Windows build via Github's "Releases" # functionality, make sure Linux/Windows names follow the same pattern. @@ -139,6 +140,7 @@ jobs: sycl_toolchain_archive: ${{ needs.build-win.outputs.artifact_archive_name }} extra_lit_opts: --param gpu-intel-gen12=True ref: sycl-rel-6_0_0 + merge_ref: '' cuda-aws-start: needs: [ubuntu2204_build] @@ -156,7 +158,7 @@ jobs: with: name: CUDA E2E runner: '["aws_cuda-${{ github.run_id }}-${{ github.run_attempt }}"]' - image: ghcr.io/intel/llvm/ubuntu2204_build:latest-0300ac924620a51f76c4929794637b82790f12ab + image: ghcr.io/intel/llvm/ubuntu2204_build:latest image_options: -u 1001 --gpus all --cap-add SYS_ADMIN --env NVIDIA_DISABLE_REQUIRE=1 target_devices: cuda:gpu ref: sycl-rel-6_0_0 diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml index f7e13df7a97c7..de2d452de7a50 100644 --- a/.github/workflows/sycl-windows-build.yml +++ b/.github/workflows/sycl-windows-build.yml @@ -7,9 +7,6 @@ on: type: string required: false default: "default" - build_ref: - type: string - required: false build_configure_extra_args: type: string required: false @@ -18,6 +15,9 @@ on: description: 'Filter matches for the changed files in the PR' default: '[llvm, clang, sycl, llvm_spirv, xptifw, libclc, libdevice]' required: false + ref: + type: string + required: False merge_ref: description: | Commit-ish to merge post-checkout if non-empty. Must be reachable from @@ -105,7 +105,7 @@ jobs: - uses: ./devops/actions/cached_checkout with: path: src - ref: ${{ inputs.build_ref || github.sha }} + ref: ${{ inputs.ref || github.sha }} merge_ref: ${{ inputs.merge_ref }} cache_path: "D:\\\\github\\\\_work\\\\repo_cache\\\\" - name: Configure diff --git a/.github/workflows/sycl-windows-run-tests.yml b/.github/workflows/sycl-windows-run-tests.yml index a1e27f4fda1d0..dbd4d7ff439ed 100644 --- a/.github/workflows/sycl-windows-run-tests.yml +++ b/.github/workflows/sycl-windows-run-tests.yml @@ -18,6 +18,13 @@ on: ref: type: string required: False + merge_ref: + description: | + Commit-ish to merge post-checkout if non-empty. Must be reachable from + the default_branch input paramter. + type: string + default: 'FETCH_HEAD' + required: False sycl_toolchain_artifact: type: string @@ -68,7 +75,8 @@ jobs: - uses: ./devops/actions/cached_checkout with: path: llvm - ref: ${{ inputs.build_ref || github.sha }} + ref: ${{ inputs.ref || github.sha }} + merge_ref: ${{ inputs.merge_ref }} cache_path: "D:\\\\github\\\\_work\\\\repo_cache\\\\" - name: Download compiler toolchain uses: actions/download-artifact@v4 diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a25659459a5b4..10e3a4920fda7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10706,38 +10706,21 @@ static void getSPIRVBackendOpts(const llvm::opt::ArgList &TCArgs, ArgStringList &BackendArgs) { BackendArgs.push_back(TCArgs.MakeArgString("-filetype=obj")); BackendArgs.push_back( - TCArgs.MakeArgString("-mtriple=spirv64-unknown-unknown")); - // TODO: Optimization level is currently forced to -O0 due to some testing - // issues. Update optimization level after testing issues are resolved. - BackendArgs.push_back(TCArgs.MakeArgString("-O0")); + TCArgs.MakeArgString("-mtriple=spirv64v1.6-unknown-unknown")); BackendArgs.push_back( TCArgs.MakeArgString("--avoid-spirv-capabilities=Shader")); BackendArgs.push_back( TCArgs.MakeArgString("--translator-compatibility-mode")); - - // TODO: There is some overlap between the lists of extensions in SPIR-V - // backend and SPIR-V Trnaslator). We will try to combine them when SPIR-V - // backdn is ready. - std::string ExtArg("--spirv-ext="); - std::string DefaultExtArg = - "+SPV_EXT_shader_atomic_float_add,+SPV_EXT_shader_atomic_float_min_max" - ",+SPV_KHR_no_integer_wrap_decoration,+SPV_KHR_float_controls" - ",+SPV_KHR_expect_assume,+SPV_KHR_linkonce_odr"; - std::string INTELExtArg = ",+SPV_INTEL_subgroups,+SPV_INTEL_function_pointers" - ",+SPV_INTEL_arbitrary_precision_integers" - ",+SPV_INTEL_variable_length_array"; - ExtArg = ExtArg + DefaultExtArg + INTELExtArg; - - // Other args - ExtArg += ",+SPV_INTEL_bfloat16_conversion" - ",+SPV_KHR_uniform_group_instructions" - ",+SPV_INTEL_optnone" - ",+SPV_KHR_subgroup_rotate" - ",+SPV_INTEL_usm_storage_classes" - ",+SPV_EXT_shader_atomic_float16_add" - ",+SPV_KHR_bit_instructions"; - - BackendArgs.push_back(TCArgs.MakeArgString(ExtArg)); + // TODO: A list of SPIR-V extensions that are supported by the SPIR-V backend + // is growing. Let's postpone the decision on which extensions to enable until + // - the list is stable, and + // - we decide on a mapping of user requested extensions into backend's ones. + // Meanwhile we enable all the SPIR-V backend extensions. + BackendArgs.push_back(TCArgs.MakeArgString("--spirv-ext=all")); + // TODO: + // - handle -Xspirv-translator option to avoid "argument unused during + // compilation" error + // - handle --spirv-ext=+ and --spirv-ext=- options } // Utility function to gather all llvm-spirv options. diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index fb81abd4b7ef0..8c88047e9e649 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -53,12 +53,12 @@ int main() { // CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" // CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" // CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] -// CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [772 x i8] +// CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" // CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }] -// CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 772], section ".tgtimg", align 16 +// CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" -// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([772 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 772), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] +// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] // CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null } // CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }] // CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }] diff --git a/clang/test/Driver/sycl-spirv-backend.cpp b/clang/test/Driver/sycl-spirv-backend.cpp index db159f6deafb5..7697c1055b3d2 100644 --- a/clang/test/Driver/sycl-spirv-backend.cpp +++ b/clang/test/Driver/sycl-spirv-backend.cpp @@ -3,4 +3,4 @@ /// // RUN: %clangxx -fsycl -fsycl-use-spirv-backend-for-spirv-gen -### %s 2>&1 | FileCheck %s -// CHECK: llc{{.*}} "-filetype=obj" "-mtriple=spirv64-unknown-unknown" "-O0" "--avoid-spirv-capabilities=Shader" "--translator-compatibility-mode" "--spirv-ext= +// CHECK: llc{{.*}} "-filetype=obj" "-mtriple=spirv64{{[^-]*}}-unknown-unknown" "--avoid-spirv-capabilities=Shader" "--translator-compatibility-mode" "--spirv-ext= diff --git a/devops/bandit.config b/devops/bandit.config index 49de9695a878b..4e501feef37ef 100644 --- a/devops/bandit.config +++ b/devops/bandit.config @@ -40,7 +40,6 @@ # B317 : xml_bad_sax # B318 : xml_bad_minidom # B319 : xml_bad_pulldom -# B320 : xml_bad_etree # B321 : ftplib # B323 : unverified_context # B324 : hashlib_new_insecure_functions @@ -53,7 +52,6 @@ # B407 : import_xml_expat # B408 : import_xml_minidom # B409 : import_xml_pulldom -# B410 : import_lxml # B411 : import_xmlrpclib # B412 : import_httpoxy # B413 : import_pycrypto @@ -83,7 +81,7 @@ # IPAS Required Checkers. Do not disable these # Additional checkers may be added if desired tests: - [ 'B301', 'B302', 'B303', 'B304', 'B305', 'B306', 'B308', 'B310', 'B311', 'B312', 'B313', 'B314', 'B315', 'B316', 'B317', 'B318', 'B319', 'B320', 'B321', 'B323', 'B324', 'B401', 'B402', 'B403', 'B404', 'B405', 'B406', 'B407', 'B408', 'B409', 'B410', 'B411', 'B412', 'B413'] + [ 'B301', 'B302', 'B303', 'B304', 'B305', 'B306', 'B308', 'B310', 'B311', 'B312', 'B313', 'B314', 'B315', 'B316', 'B317', 'B318', 'B319', 'B321', 'B323', 'B324', 'B401', 'B402', 'B403', 'B404', 'B405', 'B406', 'B407', 'B408', 'B409', 'B411', 'B412', 'B413'] # (optional) list skipped test IDs here, eg '[B101, B406]': # The following checkers are not required but be added to tests list if desired diff --git a/devops/containers/ubuntu2404_base.Dockerfile b/devops/containers/ubuntu2404_base.Dockerfile index 7af9ccfec1e5f..3cdad5b74366e 100644 --- a/devops/containers/ubuntu2404_base.Dockerfile +++ b/devops/containers/ubuntu2404_base.Dockerfile @@ -8,6 +8,13 @@ USER root COPY scripts/install_build_tools.sh /install.sh RUN /install.sh +# libzstd-dev installed by default on Ubuntu 24.04 is not compiled with -fPIC flag. +# This causes linking errors when building SYCL runtime. +# Bug: https://github.com/intel/llvm/issues/15935 +# Workaround: build zstd from sources with -fPIC flag. +COPY scripts/build_zstd_1_5_6_ub24.sh /build_zstd_1_5_6_ub24.sh +RUN /build_zstd_1_5_6_ub24.sh + COPY scripts/create-sycl-user.sh /user-setup.sh RUN /user-setup.sh diff --git a/devops/containers/ubuntu2404_build.Dockerfile b/devops/containers/ubuntu2404_build.Dockerfile index 5391030df006a..c659eabbced51 100644 --- a/devops/containers/ubuntu2404_build.Dockerfile +++ b/devops/containers/ubuntu2404_build.Dockerfile @@ -8,6 +8,13 @@ USER root COPY scripts/install_build_tools.sh /install.sh RUN /install.sh +# libzstd-dev installed by default on Ubuntu 24.04 is not compiled with -fPIC flag. +# This causes linking errors when building SYCL runtime. +# Bug: https://github.com/intel/llvm/issues/15935 +# Workaround: build zstd from sources with -fPIC flag. +COPY scripts/build_zstd_1_5_6_ub24.sh /build_zstd_1_5_6_ub24.sh +RUN /build_zstd_1_5_6_ub24.sh + SHELL ["/bin/bash", "-ec"] # Make the directory if it doesn't exist yet. diff --git a/devops/dependencies-igc-dev.json b/devops/dependencies-igc-dev.json index 28cc58e1c7947..20d3c7c9aa08b 100644 --- a/devops/dependencies-igc-dev.json +++ b/devops/dependencies-igc-dev.json @@ -1,10 +1,10 @@ { "linux": { "igc_dev": { - "github_tag": "igc-dev-e0d826a", - "version": "e0d826a", - "updated_at": "2024-12-17T21:18:30Z", - "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2327583926/zip", + "github_tag": "igc-dev-97b3d8f", + "version": "97b3d8f", + "updated_at": "2025-01-08T17:43:30Z", + "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2403247641/zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" } } diff --git a/devops/dependencies.json b/devops/dependencies.json index f5976861016e0..79892387df4c1 100644 --- a/devops/dependencies.json +++ b/devops/dependencies.json @@ -1,15 +1,15 @@ { "linux": { "compute_runtime": { - "github_tag": "24.48.31907.7", - "version": "24.48.31907.7", - "url": "https://github.com/intel/compute-runtime/releases/tag/24.48.31907.7", + "github_tag": "24.52.32224.5", + "version": "24.52.32224.5", + "url": "https://github.com/intel/compute-runtime/releases/tag/24.52.32224.5", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "igc": { - "github_tag": "v2.2.3", - "version": "2.2.3", - "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.2.3", + "github_tag": "v2.5.6", + "version": "2.5.6", + "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.5.6", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "cm": { diff --git a/devops/scripts/build_zstd_1_5_6_ub24.sh b/devops/scripts/build_zstd_1_5_6_ub24.sh new file mode 100755 index 0000000000000..68a947dfb43a0 --- /dev/null +++ b/devops/scripts/build_zstd_1_5_6_ub24.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +# Script to build and install zstd 1.5.6 on Ubuntu 24, with -fPIC flag. +# The default installation of zstd on Ubuntu 24 does not have -fPIC flag +# enabled, which is required for building DPC++ in shared libraries mode. + +# Function to check if the OS is Ubuntu 24 +check_os() { + . /etc/os-release + if [[ "$NAME" != "Ubuntu" || "$VERSION_ID" != "24.04" ]]; then + echo "Warning: This script has only been tested with Ubuntu 24." + fi +} + +# Function to install packages with or without sudo +install_packages() { + if [ "$USE_SUDO" = true ]; then + sudo apt-get update + sudo apt-get install -y build-essential wget + else + apt-get update + apt-get install -y build-essential wget + fi +} + +# Function to uninstall libzstd-dev if installed +uninstall_libzstd_dev() { + if dpkg -l | grep -q libzstd-dev; then + if [ "$USE_SUDO" = true ]; then + sudo apt-get remove -y libzstd-dev + else + apt-get remove -y libzstd-dev + fi + fi +} + +# Function to build a shared library by linking zstd static lib. +# This is used to verify that zstd is built correctly, with -fPIC flag. +build_test_program() { + cat < test_zstd.c + #include + int main() { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_freeCCtx(cctx); + return 0; + } +EOF + + # Try to use zstd's static library with -fPIC + gcc test_zstd.c -lzstd -fPIC -shared + if [ $? -ne 0 ]; then + echo "zstd installation verification failed." + else + echo "zstd installation verification passed." + fi + + # There won't be a.out file if verification failed. + rm test_zstd.c a.out || true +} + +# Check the OS +check_os + +# Set USE_SUDO to true or false based on your preference +USE_SUDO=true + +# Install necessary build tools +install_packages + +# Uninstall libzstd-dev package if installed +uninstall_libzstd_dev + +# Define the version and URL for zstd +ZSTD_VERSION="1.5.6" +ZSTD_URL="https://github.com/facebook/zstd/releases/download/v$ZSTD_VERSION/zstd-$ZSTD_VERSION.tar.gz" + +# Create a directory for the source code +mkdir -p zstd_build +cd zstd_build + +# Download and extract zstd source code +wget $ZSTD_URL +tar -xzf zstd-$ZSTD_VERSION.tar.gz +cd zstd-$ZSTD_VERSION + +# Build zstd with -fPIC flag. +CFLAGS="-fPIC" CXXFLAGS="-fPIC" make +if [ $? -ne 0 ]; then + echo "Error: make failed." + exit 1 +fi + +# Install zstd. +if [ "$USE_SUDO" = true ]; then + sudo make install +else + make install +fi +if [ $? -ne 0 ]; then + echo "Error: make install failed." + exit 1 +fi + +# Verify zstd installation. +build_test_program + +# Clean up +rm -rf zstd_build diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh index 37d9761751ebb..37e2c7e15ac4b 100755 --- a/devops/scripts/install_build_tools.sh +++ b/devops/scripts/install_build_tools.sh @@ -24,5 +24,5 @@ apt update && apt install -yqq \ jq \ curl \ libhwloc-dev \ - libzstd-dev - + libzstd-dev \ + time diff --git a/devops/scripts/install_drivers.sh b/devops/scripts/install_drivers.sh index 570f78091d9f2..01c2dde54d6d0 100755 --- a/devops/scripts/install_drivers.sh +++ b/devops/scripts/install_drivers.sh @@ -140,7 +140,7 @@ InstallIGFX () { get_release oneapi-src/level-zero $L0_TAG \ | grep ".*$UBUNTU_VER.*deb" \ | wget -qi - - dpkg -i --force-overwrite *.deb && rm *.deb *.sum + dpkg -i --force-all *.deb && rm *.deb *.sum mkdir -p /usr/local/lib/igc/ echo "$IGC_TAG" > /usr/local/lib/igc/IGCTAG.txt if [ "$IS_IGC_DEV" == "Yes" ]; then @@ -149,21 +149,21 @@ InstallIGFX () { # Backup and install it from release igc as a temporarily workaround # while we working to resolve the issue. echo "Backup libopencl-clang" - cp -d /usr/local/lib/libopencl-clang.so.14* . + cp -d /usr/local/lib/libopencl-clang2.so.14* . echo "Download IGC dev git hash $IGC_DEV_VER" get_pre_release_igfx $IGC_DEV_URL $IGC_DEV_VER echo "Install IGC dev git hash $IGC_DEV_VER" # New dev IGC packaged iga64 conflicting with iga64 from intel-igc-media # force overwrite to workaround it first. - dpkg -i --force-overwrite *.deb + dpkg -i --force-all *.deb echo "Install libopencl-clang" # Workaround only, will download deb and install with dpkg once fixed. - cp -d libopencl-clang.so.14* /usr/local/lib/ + cp -d libopencl-clang2.so.14* /usr/local/lib/ rm /usr/local/lib/libigc.so /usr/local/lib/libigc.so.1* && \ ln -s /usr/local/lib/libigc.so.2 /usr/local/lib/libigc.so && \ ln -s /usr/local/lib/libigc.so.2 /usr/local/lib/libigc.so.1 echo "Clean up" - rm *.deb libopencl-clang.so.14* + rm *.deb libopencl-clang2.so.14* echo "$IGC_DEV_TAG" > /usr/local/lib/igc/IGCTAG.txt fi } diff --git a/devops/scripts/update_drivers.py b/devops/scripts/update_drivers.py index 4c3cbb791c851..e9b14f87d5572 100644 --- a/devops/scripts/update_drivers.py +++ b/devops/scripts/update_drivers.py @@ -48,16 +48,18 @@ def uplift_linux_igfx_driver(config, platform_tag, igc_dev_only): config[platform_tag]['compute_runtime']['version'] = compute_runtime['tag_name'] config[platform_tag]['compute_runtime']['url'] = 'https://github.com/intel/compute-runtime/releases/tag/' + compute_runtime['tag_name'] - for a in compute_runtime['assets']: - if a['name'].endswith('.sum'): - deps = str(urlopen(a['browser_download_url']).read()) - m = re.search(r"intel-igc-core_([0-9\.]*)_amd64", deps) - if m is not None: - ver = m.group(1) - config[platform_tag]['igc']['github_tag'] = 'igc-' + ver - config[platform_tag]['igc']['version'] = ver - config[platform_tag]['igc']['url'] = 'https://github.com/intel/intel-graphics-compiler/releases/tag/igc-' + ver - break + m = re.search( + re.escape("https://github.com/intel/intel-graphics-compiler/releases/tag/") + + r"(v[\.0-9]+)", + compute_runtime["body"], + ) + if m is not None: + ver = m.group(1) + config[platform_tag]["igc"]["github_tag"] = ver + config[platform_tag]["igc"]["version"] = ver + config[platform_tag]["igc"]["url"] = ( + "https://github.com/intel/intel-graphics-compiler/releases/tag/" + ver + ) cm = get_latest_release('intel/cm-compiler') config[platform_tag]['cm']['github_tag'] = cm['tag_name'] diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h index 4b9b76d33e393..6f4782725d514 100644 --- a/libclc/clc/include/clc/clcmacro.h +++ b/libclc/clc/include/clc/clcmacro.h @@ -247,6 +247,8 @@ } \ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) +#pragma OPENCL EXTENSION cl_khr_fp16 : disable + #else #define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) diff --git a/libclc/clspv/lib/math/fma.cl b/libclc/clspv/lib/math/fma.cl index 556bd837a27a7..e6251db4e92db 100644 --- a/libclc/clspv/lib/math/fma.cl +++ b/libclc/clspv/lib/math/fma.cl @@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) { ((uint)st_fma.mantissa.lo & 0x7fffff)); } _CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float) + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) { + return (half)mad((float)a, (float)b, (float)c); +} +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half) + +#endif diff --git a/libclc/generic/gen_convert_common.py b/libclc/generic/gen_convert_common.py index d4399adda10d9..98cd83fc1a572 100644 --- a/libclc/generic/gen_convert_common.py +++ b/libclc/generic/gen_convert_common.py @@ -47,25 +47,31 @@ 'half' : 2, 'float' : 4, 'double': 8} -limit_max = {'char' : 'CHAR_MAX', - 'schar' : 'CHAR_MAX', - 'uchar' : 'UCHAR_MAX', - 'short' : 'SHRT_MAX', - 'ushort': 'USHRT_MAX', - 'int' : 'INT_MAX', - 'uint' : 'UINT_MAX', - 'long' : 'LONG_MAX', - 'ulong' : 'ULONG_MAX'} +limit_max = { + "char": "CHAR_MAX", + "schar": "CHAR_MAX", + "uchar": "UCHAR_MAX", + "short": "SHRT_MAX", + "ushort": "USHRT_MAX", + "int": "INT_MAX", + "uint": "UINT_MAX", + "long": "LONG_MAX", + "ulong": "ULONG_MAX", + "half": "0x1.ffcp+15", +} -limit_min = {'char' : 'CHAR_MIN', - 'schar' : 'CHAR_MIN', - 'uchar' : '0', - 'short' : 'SHRT_MIN', - 'ushort': '0', - 'int' : 'INT_MIN', - 'uint' : '0', - 'long' : 'LONG_MIN', - 'ulong' : '0'} +limit_min = { + "char": "CHAR_MIN", + "schar": "CHAR_MIN", + "uchar": "0", + "short": "SHRT_MIN", + "ushort": "0", + "int": "INT_MIN", + "uint": "0", + "long": "LONG_MIN", + "ulong": "0", + "half": "-0x1.ffcp+15", +} def conditional_guard(src, dst): diff --git a/libclc/generic/include/clc/convert.h b/libclc/generic/include/clc/convert.h index eac4f4216ee43..687a685e70534 100644 --- a/libclc/generic/include/clc/convert.h +++ b/libclc/generic/include/clc/convert.h @@ -23,10 +23,19 @@ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) #else #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) @@ -43,11 +52,19 @@ _CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) -#else +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) #endif diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py index a1220fd3c2664..38817a3fe4ed7 100644 --- a/libclc/generic/lib/gen_convert.py +++ b/libclc/generic/lib/gen_convert.py @@ -65,21 +65,21 @@ "uint", "long", "ulong", + "half", "float", "double", ] int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"] unsigned_types = ["uchar", "ushort", "uint", "ulong"] -float_types = ["float", "double"] +float_types = ["half", "float", "double"] int64_types = ["long", "ulong"] float64_types = ["double"] +float16_types = ["half"] vector_sizes = ["", "2", "3", "4", "8", "16"] half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")] saturation = ["", "_sat"] rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"] -float_prefix = {"float": "FLT_", "double": "DBL_"} -float_suffix = {"float": "f", "double": ""} bool_type = { "char": "char", @@ -90,6 +90,7 @@ "uint": "int", "long": "long", "ulong": "long", + "half": "short", "float": "int", "double": "long", } @@ -114,6 +115,7 @@ "uint": 4, "long": 8, "ulong": 8, + "half": 2, "float": 4, "double": 8, } @@ -127,6 +129,7 @@ "uint": "UINT_MAX", "long": "LONG_MAX", "ulong": "ULONG_MAX", + "half": "0x1.ffcp+15", } limit_min = { @@ -138,24 +141,33 @@ "uint": "0", "long": "LONG_MIN", "ulong": "0", + "half": "-0x1.ffcp+15", } def conditional_guard(src, dst): int64_count = 0 float64_count = 0 + float16_count = 0 if src in int64_types: int64_count = int64_count + 1 elif src in float64_types: float64_count = float64_count + 1 + elif src in float16_types: + float16_count = float16_count + 1 if dst in int64_types: int64_count = int64_count + 1 elif dst in float64_types: float64_count = float64_count + 1 + elif dst in float16_types: + float16_count = float16_count + 1 if float64_count > 0: # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be print("#ifdef cl_khr_fp64") return True + elif float16_count > 0: + print("#if defined cl_khr_fp16") + return True elif int64_count > 0: print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)") return True @@ -198,6 +210,10 @@ def conditional_guard(src, dst): #pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl index e7ceaa14c3a38..d71d10024b180 100644 --- a/libclc/generic/lib/math/acos.cl +++ b/libclc/generic/lib/math/acos.cl @@ -171,3 +171,11 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acos) + +#endif diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl index e433b133ebb76..977c2e929b34c 100644 --- a/libclc/generic/lib/math/acosh.cl +++ b/libclc/generic/lib/math/acosh.cl @@ -125,3 +125,11 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh) + +#endif diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl index 753ee1cc3687f..5aa8a083df4e9 100644 --- a/libclc/generic/lib/math/acospi.cl +++ b/libclc/generic/lib/math/acospi.cl @@ -170,3 +170,11 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi) + +#endif diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl index 8fa118d77899c..13963b2d4d9ca 100644 --- a/libclc/generic/lib/math/asinh.cl +++ b/libclc/generic/lib/math/asinh.cl @@ -291,3 +291,11 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh) + +#endif diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl index a07019751a118..21b24a25a5210 100644 --- a/libclc/generic/lib/math/atan.cl +++ b/libclc/generic/lib/math/atan.cl @@ -181,3 +181,12 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double); #endif // cl_khr_fp64 + + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atan) + +#endif diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl index d8b209b27e696..fd57a492ed414 100644 --- a/libclc/generic/lib/math/atan2.cl +++ b/libclc/generic/lib/math/atan2.cl @@ -235,3 +235,11 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double); #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2) + +#endif diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl index a6b7a7eadbb97..19ab7346bb70d 100644 --- a/libclc/generic/lib/math/atan2pi.cl +++ b/libclc/generic/lib/math/atan2pi.cl @@ -219,3 +219,11 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) { _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi) + +#endif diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl index de1c3bf5f2fca..10bad190cc0dc 100644 --- a/libclc/generic/lib/math/atanh.cl +++ b/libclc/generic/lib/math/atanh.cl @@ -111,3 +111,11 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh) + +#endif diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl index 5df1e04e16492..8522acf349933 100644 --- a/libclc/generic/lib/math/atanpi.cl +++ b/libclc/generic/lib/math/atanpi.cl @@ -180,3 +180,11 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi) + +#endif diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl index f5a9068600c92..76ba2a7697121 100644 --- a/libclc/generic/lib/math/cbrt.cl +++ b/libclc/generic/lib/math/cbrt.cl @@ -149,3 +149,11 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt) + +#endif diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl index 4c76f23b9a4c8..eee9c9fcaa2d4 100644 --- a/libclc/generic/lib/math/clc_rootn.cl +++ b/libclc/generic/lib/math/clc_rootn.cl @@ -369,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) { + return (half)__clc_rootn((float)x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc index 2005d1da66441..b701d78878c6a 100644 --- a/libclc/generic/lib/math/clc_sw_binary.inc +++ b/libclc/generic/lib/math/clc_sw_binary.inc @@ -2,11 +2,25 @@ #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) { return __CLC_SW_FUNC(__CLC_FUNC)(x, y); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return convert_half( + __CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)( + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x), + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y))); +} +#endif #endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc index 842e7545b19b9..8767a2b134d09 100644 --- a/libclc/generic/lib/math/clc_sw_unary.inc +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -4,9 +4,19 @@ #define __CLC_SW_FUNC __CLC_XCONCAT(__clc_, __CLC_FUNC) #endif -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { return __CLC_SW_FUNC(x); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return convert_half(__CLC_SW_FUNC(convert_float(x))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)( + __CLC_SW_FUNC(__CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x))); +} +#endif #endif diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl index 792eb9ac3f1b5..5e5d43c0990fd 100644 --- a/libclc/generic/lib/math/cos.cl +++ b/libclc/generic/lib/math/cos.cl @@ -42,3 +42,11 @@ _CLC_OVERLOAD _CLC_DEF double cos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double); #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cos) + +#endif diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl index 6f932d6a8adbf..84c0505090ec2 100644 --- a/libclc/generic/lib/math/cosh.cl +++ b/libclc/generic/lib/math/cosh.cl @@ -190,3 +190,11 @@ _CLC_OVERLOAD _CLC_DEF double cosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh) + +#endif diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl index 5d1f6e238de49..9556cc04e4167 100644 --- a/libclc/generic/lib/math/cospi.cl +++ b/libclc/generic/lib/math/cospi.cl @@ -40,3 +40,11 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) { } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double); #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi) + +#endif diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl index 1cc4c98de4c09..b36cb0d575d3a 100644 --- a/libclc/generic/lib/math/exp.cl +++ b/libclc/generic/lib/math/exp.cl @@ -43,3 +43,11 @@ _CLC_OVERLOAD _CLC_DEF double exp(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(exp) + +#endif diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl index 0b8dbb6b66383..fc52c6f42484a 100644 --- a/libclc/generic/lib/math/expm1.cl +++ b/libclc/generic/lib/math/expm1.cl @@ -21,3 +21,11 @@ _CLC_OVERLOAD _CLC_DEF double expm1(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1) + +#endif diff --git a/libclc/generic/lib/math/fdim.inc b/libclc/generic/lib/math/fdim.inc index 9aa3496b18902..98cbef6076667 100644 --- a/libclc/generic/lib/math/fdim.inc +++ b/libclc/generic/lib/math/fdim.inc @@ -69,3 +69,28 @@ __CLC_FDIM_VEC(16) #undef __CLC_FDIM_VEC #endif #endif + +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +#define QNANBITPATT_FP16 ((short)0x7e00) +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, + private __CLC_GENTYPE y) { + short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16; + short r = -(x > y) & as_short(x - y); + return as_half((short)(n | r)); +} +#define __CLC_FDIM_VEC(width) \ + _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) { \ + /* See comment in float implementation for explanation. */ \ + short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16; \ + short##width r = (x > y) & as_short##width(x - y); \ + return as_half##width(n | r); \ + } +__CLC_FDIM_VEC(2) +__CLC_FDIM_VEC(3) +__CLC_FDIM_VEC(4) +__CLC_FDIM_VEC(8) +__CLC_FDIM_VEC(16) +#undef __CLC_FDIM_VEC +#endif +#endif diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc index ace8829c6269b..0d938d23c26a1 100644 --- a/libclc/generic/lib/math/frexp.inc +++ b/libclc/generic/lib/math/frexp.inc @@ -42,6 +42,17 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE } #endif +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, + __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + return (__CLC_GENTYPE)frexp((float)x, ep); +} +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp, + __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN); +#endif +#endif + #if __CLC_FPSIZE == 64 #ifdef __CLC_SCALAR #define __CLC_AS_LONGN as_long diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl index 39b82cfdc22cc..f16b4404fbebe 100644 --- a/libclc/generic/lib/math/ilogb.cl +++ b/libclc/generic/lib/math/ilogb.cl @@ -71,3 +71,15 @@ _CLC_OVERLOAD _CLC_DEF int ilogb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF int ilogb(half x) { + return ilogb((float)x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half); + +#endif diff --git a/libclc/generic/lib/math/lgamma.cl b/libclc/generic/lib/math/lgamma.cl index bf9aefc49c4e1..f0476230e63fe 100644 --- a/libclc/generic/lib/math/lgamma.cl +++ b/libclc/generic/lib/math/lgamma.cl @@ -41,4 +41,12 @@ _CLC_OVERLOAD _CLC_DEF double lgamma(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double) -#endif \ No newline at end of file +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma) + +#endif diff --git a/libclc/generic/lib/math/lgamma_r.cl b/libclc/generic/lib/math/lgamma_r.cl index c459e8a7d097d..49439eb0a95f4 100644 --- a/libclc/generic/lib/math/lgamma_r.cl +++ b/libclc/generic/lib/math/lgamma_r.cl @@ -486,6 +486,17 @@ _CLC_OVERLOAD _CLC_DEF double lgamma_r(double x, private int *ip) { _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma_r, double, private, int) #endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half lgamma_r(half x, private int *iptr) { + return (half)lgamma_r((float)x, iptr); +} + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, lgamma_r, half, private, int); + +#endif #define __CLC_ADDRSPACE global #define __CLC_BODY diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc index 0e19ba8fb2c7c..8aa17fbe79bd8 100644 --- a/libclc/generic/lib/math/lgamma_r.inc +++ b/libclc/generic/lib/math/lgamma_r.inc @@ -21,12 +21,9 @@ * THE SOFTWARE. */ -// TODO: Enable half precision when the base version is implemented. -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { __CLC_INTN private_iptr; __CLC_GENTYPE ret = lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; } -#endif diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl index 4c338edee1d33..d7d35c0910e9a 100644 --- a/libclc/generic/lib/math/log10.cl +++ b/libclc/generic/lib/math/log10.cl @@ -29,6 +29,10 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + _CLC_OVERLOAD _CLC_DEF float log10(float x) { return __spirv_ocl_log10(x); } @@ -42,3 +46,11 @@ _CLC_OVERLOAD _CLC_DEF double log10(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DEF half log10(half x) { + return __spirv_ocl_log10(x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl index d4b8eef74bfe6..67f029a14431e 100644 --- a/libclc/generic/lib/math/log1p.cl +++ b/libclc/generic/lib/math/log1p.cl @@ -175,3 +175,11 @@ _CLC_OVERLOAD _CLC_DEF double log1p(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p) + +#endif diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl index 1cd2ebcddabf0..b91a13529412f 100644 --- a/libclc/generic/lib/math/log2.cl +++ b/libclc/generic/lib/math/log2.cl @@ -29,6 +29,10 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + _CLC_OVERLOAD _CLC_DEF float log2(float x) { return __spirv_ocl_log2(x); } @@ -42,3 +46,11 @@ _CLC_OVERLOAD _CLC_DEF double log2(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DEF half log2(half x) { + return __spirv_ocl_log2(x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log2, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h index 4e20329f641bb..b8110ca1779a2 100644 --- a/libclc/generic/lib/math/log_base.h +++ b/libclc/generic/lib/math/log_base.h @@ -295,3 +295,22 @@ log(double x) } #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +_CLC_OVERLOAD _CLC_DEF half +#if defined(COMPILING_LOG2) +log2(half x) { + return (half)log2((float)x); +} +#elif defined(COMPILING_LOG10) +log10(half x) { + return (half)log10((float)x); +} +#else +log(half x) { + return (half)log((float)x); +} +#endif + +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl index e77088367dfbd..9683d68b52c3d 100644 --- a/libclc/generic/lib/math/logb.cl +++ b/libclc/generic/lib/math/logb.cl @@ -18,3 +18,11 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(logb) + +#endif diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc index 2add2c7459de9..84729d90a796f 100644 --- a/libclc/generic/lib/math/pown.inc +++ b/libclc/generic/lib/math/pown.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_pown(x, y); } -#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index 32bd41da37ddc..4c1133436b46d 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,9 +1,6 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { private __CLC_INTN private_q; __CLC_GENTYPE ret = __clc_remquo(x, y, &private_q); *q = private_q; return ret; } -#endif diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc index f788649685ac9..3f5b00c082cd3 100644 --- a/libclc/generic/lib/math/rootn.inc +++ b/libclc/generic/lib/math/rootn.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_rootn(x, y); } -#endif diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl index 0ff24e0b21e1e..6a3299bda4073 100644 --- a/libclc/generic/lib/math/sin.cl +++ b/libclc/generic/lib/math/sin.cl @@ -42,3 +42,11 @@ _CLC_OVERLOAD _CLC_DEF double sin(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double); #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sin) + +#endif diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index 177e74e605c51..b5a35c21f81f0 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -1,5 +1,3 @@ -// TODO: Enable half precision when sin/cos is implemented -#if __CLC_FPSIZE > 16 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ return __spirv_ocl_sincos(x, cosval); \ @@ -13,4 +11,3 @@ __CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) #endif #undef __CLC_DECLARE_SINCOS -#endif diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl index 742aa6124f6a4..48aa1ee756190 100644 --- a/libclc/generic/lib/math/sinh.cl +++ b/libclc/generic/lib/math/sinh.cl @@ -189,3 +189,11 @@ _CLC_OVERLOAD _CLC_DEF double sinh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh) + +#endif diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl index e26aaf3382c74..059f912507e66 100644 --- a/libclc/generic/lib/math/sinpi.cl +++ b/libclc/generic/lib/math/sinpi.cl @@ -43,3 +43,11 @@ _CLC_OVERLOAD _CLC_DEF double sinpi(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi) + +#endif diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl index 95a07fe5ac6b4..d9509c57b0507 100644 --- a/libclc/generic/lib/math/tanh.cl +++ b/libclc/generic/lib/math/tanh.cl @@ -144,3 +144,11 @@ _CLC_OVERLOAD _CLC_DEF double tanh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh) + +#endif diff --git a/libclc/libspirv/include/libspirv/conversion/GenericCastToPtrExplicit.h b/libclc/libspirv/include/libspirv/conversion/GenericCastToPtrExplicit.h new file mode 100644 index 0000000000000..2e5b954696543 --- /dev/null +++ b/libclc/libspirv/include/libspirv/conversion/GenericCastToPtrExplicit.h @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic void *, int); \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic const void *, int); \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic volatile void *, int); \ + _CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void * \ + __spirv_GenericCastToPtrExplicit_To##NAME(generic const volatile void *, \ + int) + +GenericCastToPtrExplicit_To(global, Global); +GenericCastToPtrExplicit_To(local, Local); +GenericCastToPtrExplicit_To(private, Private); + +#undef GenericCastToPtrExplicit_To diff --git a/libclc/libspirv/include/libspirv/spirv.h b/libclc/libspirv/include/libspirv/spirv.h index e926f7d8ff7a7..657ae6a220cf8 100644 --- a/libclc/libspirv/include/libspirv/spirv.h +++ b/libclc/libspirv/include/libspirv/spirv.h @@ -92,4 +92,7 @@ #include #include +/* Pointer Conversion */ +#include + #pragma OPENCL EXTENSION all : disable diff --git a/libclc/libspirv/include/libspirv/spirv_builtins.h b/libclc/libspirv/include/libspirv/spirv_builtins.h index 1b1aa983cc320..8a3f9070a1aee 100644 --- a/libclc/libspirv/include/libspirv/spirv_builtins.h +++ b/libclc/libspirv/include/libspirv/spirv_builtins.h @@ -16,6 +16,14 @@ #ifndef CLC_SPIRV_BINDING #define CLC_SPIRV_BINDING +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_bool_t __spirv_All(__clc_vec2_char_t); _CLC_OVERLOAD diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 08f39dd63c640..3665db09f6bd1 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -14,6 +14,7 @@ atomic/atomic_min.cl atomic/atomic_max.cl atomic/atomic_sub.cl atomic/atomic_store.cl +conversion/GenericCastToPtrExplicit.cl synchronization/barrier.cl math/acos.cl math/acosh.cl @@ -64,10 +65,8 @@ workitem/get_global_size.cl workitem/get_local_size.cl workitem/get_num_groups.cl workitem/get_max_sub_group_size.cl -workitem/get_num_sub_groups.cl workitem/get_sub_group_id.cl workitem/get_sub_group_local_id.cl -workitem/get_sub_group_size.cl misc/sub_group_shuffle.cl async/wait_group_events.cl assert/__assert_fail.ll diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/conversion/GenericCastToPtrExplicit.cl b/libclc/libspirv/lib/amdgcn-amdhsa/conversion/GenericCastToPtrExplicit.cl new file mode 100644 index 0000000000000..5072ffdfcf268 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/conversion/GenericCastToPtrExplicit.cl @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + + +_CLC_DEF static bool __clc_amdgcn_is_private(generic void *ptr) { + return __builtin_amdgcn_is_private(ptr); +} +_CLC_DEF static bool __clc_amdgcn_is_local(generic void *ptr) { + return __builtin_amdgcn_is_shared(ptr); +} +_CLC_DEF static bool __clc_amdgcn_is_global(generic void *ptr) { + return !__clc_amdgcn_is_private(ptr) && !__clc_amdgcn_is_local(ptr); +} + +#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic void *ptr, int unused) { \ + if (__clc_amdgcn_is_##ADDRSPACE(ptr)) \ + return (ADDRSPACE void *)ptr; \ + return 0; \ + } \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic const void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic volatile void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } \ + _CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void \ + *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic const volatile void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } + +GenericCastToPtrExplicit_To(global, Global) +GenericCastToPtrExplicit_To(local, Local) +GenericCastToPtrExplicit_To(private, Private) diff --git a/libclc/libspirv/lib/generic/SOURCES b/libclc/libspirv/lib/generic/SOURCES index 4f2455cc6b3ac..95e600cd17093 100644 --- a/libclc/libspirv/lib/generic/SOURCES +++ b/libclc/libspirv/lib/generic/SOURCES @@ -206,3 +206,5 @@ shared/vload.cl shared/vstore.cl workitem/get_global_id.cl workitem/get_global_size.cl +workitem/get_num_sub_groups.cl +workitem/get_sub_group_size.cl diff --git a/libclc/libspirv/lib/generic/gen_core_convert.py b/libclc/libspirv/lib/generic/gen_core_convert.py index 80c02489ce415..a5f094b7d6211 100755 --- a/libclc/libspirv/lib/generic/gen_core_convert.py +++ b/libclc/libspirv/lib/generic/gen_core_convert.py @@ -397,28 +397,66 @@ def generate_float_conversion(src, dst, size, mode, sat): print(" {SRC}{N} abs_x = __spirv_ocl_fabs(x);".format(SRC=src, N=size)) print(" {SRC}{N} abs_y = __spirv_ocl_fabs(y);".format(SRC=src, N=size)) print( - " return {BOOL_CONVERT}(abs_y > abs_x) ? r: __spirv_ocl_nextafter(r, __spirv_ocl_sign(r) * ({DST}{N})-INFINITY);".format( + " {DST}{N} sel = {BOOL_CONVERT}(abs_y > abs_x) ? r: __spirv_ocl_nextafter(r, __spirv_ocl_sign(r) * ({DST}{N})-INFINITY);".format( DST=dst, N=size, BOOL_CONVERT=clc_core_fn_name(bool_type[dst], size=size), ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to zero + # is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return __clc_clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max + ) + ) + else: + print(" return sel;") + if mode == "_rtp": print( - " return {BOOL_CONVERT}(y < x) ? r : __spirv_ocl_nextafter(r, ({DST}{N})INFINITY);".format( + " {DST}{N} sel = {BOOL_CONVERT}(y < x) ? r : __spirv_ocl_nextafter(r, ({DST}{N})INFINITY);".format( DST=dst, N=size, BOOL_CONVERT=clc_core_fn_name(bool_type[dst], size=size), ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + print( + " return __clc_max(sel, ({DST}{N}){DST_MIN});".format( + DST=dst, N=size, DST_MIN=limit_min[dst] + ) + ) + else: + print(" return sel;") + if mode == "_rtn": print( - " return {BOOL_CONVERT}(y > x) ? r : __spirv_ocl_nextafter(r, ({DST}{N})-INFINITY);".format( + " {DST}{N} sel = {BOOL_CONVERT}(y > x) ? r : __spirv_ocl_nextafter(r, ({DST}{N})-INFINITY);".format( DST=dst, N=size, BOOL_CONVERT=clc_core_fn_name(bool_type[dst], size=size), ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to + # negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff + # == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return __clc_min(sel, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MAX=dst_max + ) + ) + else: + print(" return sel;") + # Footer print("}") diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl similarity index 100% rename from libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl rename to libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl similarity index 100% rename from libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl rename to libclc/libspirv/lib/generic/workitem/get_sub_group_size.cl diff --git a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES index e43320ff45092..cf9a5a2c51152 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES +++ b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES @@ -3,6 +3,7 @@ atomic/loadstore_helpers_release.ll atomic/loadstore_helpers_acquire.ll atomic/loadstore_helpers_seq_cst.ll cl_khr_int64_extended_atomics/minmax_helpers.ll +conversion/GenericCastToPtrExplicit.cl integer/mul24.cl integer/mul_hi.cl math/acos.cl @@ -84,10 +85,8 @@ workitem/get_local_id.cl workitem/get_local_size.cl workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl -workitem/get_num_sub_groups.cl workitem/get_sub_group_id.cl workitem/get_sub_group_local_id.cl -workitem/get_sub_group_size.cl images/image_helpers.ll images/image.cl group/collectives_helpers.ll diff --git a/libclc/libspirv/lib/ptx-nvidiacl/conversion/GenericCastToPtrExplicit.cl b/libclc/libspirv/lib/ptx-nvidiacl/conversion/GenericCastToPtrExplicit.cl new file mode 100644 index 0000000000000..eefaa8009f1c3 --- /dev/null +++ b/libclc/libspirv/lib/ptx-nvidiacl/conversion/GenericCastToPtrExplicit.cl @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF static bool __clc_nvvm_is_private(generic void *ptr) { + return __nvvm_isspacep_local(ptr); +} +_CLC_DEF static bool __clc_nvvm_is_local(generic void *ptr) { + return __nvvm_isspacep_shared(ptr); +} +_CLC_DEF static bool __clc_nvvm_is_global(generic void *ptr) { + return __nvvm_isspacep_global(ptr); +} + +#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic void *ptr, int unused) { \ + if (__clc_nvvm_is_##ADDRSPACE(ptr)) \ + return (ADDRSPACE void *)ptr; \ + return 0; \ + } \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic const void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } \ + _CLC_DECL _CLC_OVERLOAD \ + ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic volatile void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } \ + _CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void \ + *__spirv_GenericCastToPtrExplicit_To##NAME( \ + generic const volatile void *ptr, int unused) { \ + return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \ + unused); \ + } + +GenericCastToPtrExplicit_To(global, Global) +GenericCastToPtrExplicit_To(local, Local) +GenericCastToPtrExplicit_To(private, Private) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl b/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl index bfd55b8b2d5da..0be6859af9e4a 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl @@ -46,6 +46,7 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_modf, double, #endif #ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable __CLC_MODF(__nv_modff, float, half) _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_modf, half, diff --git a/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl b/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl index 272a031b9ae35..05a7df8b751a6 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl @@ -47,6 +47,7 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sincos, #endif #ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable __CLC_SINCOS(__nv_sincosf, float, half) _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_sincos, half, diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_sub_groups.cl deleted file mode 100644 index 164b1ea66a921..0000000000000 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_sub_groups.cl +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -_CLC_DEF _CLC_OVERLOAD uint __spirv_NumSubgroups() { - // sreg.nwarpid returns number of warp identifiers, not number of warps - // see https://docs.nvidia.com/cuda/parallel-thread-execution/index.html - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); - uint sg_size = __spirv_SubgroupMaxSize(); - uint linear_size = size_z * size_y * size_x; - return (linear_size + sg_size - 1) / sg_size; -} diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_size.cl deleted file mode 100644 index b12145fe6707d..0000000000000 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_size.cl +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -_CLC_DEF _CLC_OVERLOAD uint __spirv_SubgroupSize() { - if (__spirv_SubgroupId() != __spirv_NumSubgroups() - 1) { - return __spirv_SubgroupMaxSize(); - } - size_t size_x = __spirv_WorkgroupSize_x(); - size_t size_y = __spirv_WorkgroupSize_y(); - size_t size_z = __spirv_WorkgroupSize_z(); - uint linear_size = size_z * size_y * size_x; - uint uniform_groups = __spirv_NumSubgroups() - 1; - uint uniform_size = __spirv_SubgroupMaxSize() * uniform_groups; - return linear_size - uniform_size; -} diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index 9a7bf4f758f84..2698067aa2637 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -369,6 +369,7 @@ const static char TranslateOCLMemScope[] = "__translate_ocl_memory_scope"; const static char TranslateSPIRVMemOrder[] = "__translate_spirv_memory_order"; const static char TranslateSPIRVMemScope[] = "__translate_spirv_memory_scope"; const static char TranslateSPIRVMemFence[] = "__translate_spirv_memory_fence"; +const static char EntrypointPrefix[] = "__spirv_entry_"; const static char ConvertHandleToImageINTEL[] = "ConvertHandleToImageINTEL"; const static char ConvertHandleToSamplerINTEL[] = "ConvertHandleToSamplerINTEL"; const static char ConvertHandleToSampledImageINTEL[] = diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 62764b0cb1c20..14b646a959389 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -3317,6 +3317,25 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { return Loc->second; auto IsKernel = isKernel(BF); + + if (IsKernel) { + // search for a previous function with the same name + // upgrade it to a kernel and drop this if it's found + for (auto &I : FuncMap) { + auto BFName = I.getFirst()->getName(); + if (BF->getName() == BFName) { + auto *F = I.getSecond(); + F->setCallingConv(CallingConv::SPIR_KERNEL); + F->setLinkage(GlobalValue::ExternalLinkage); + F->setDSOLocal(false); + F = cast(mapValue(BF, F)); + mapFunction(BF, F); + transFunctionAttrs(BF, F); + return F; + } + } + } + auto Linkage = IsKernel ? GlobalValue::ExternalLinkage : transLinkageType(BF); FunctionType *FT = cast(transType(BF->getFunctionType())); std::string FuncName = BF->getName(); @@ -3360,56 +3379,7 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL : CallingConv::SPIR_FUNC); - if (BF->hasDecorate(DecorationReferencedIndirectlyINTEL)) - F->addFnAttr("referenced-indirectly"); - if (isFuncNoUnwind()) - F->addFnAttr(Attribute::NoUnwind); - foreachFuncCtlMask(BF, [&](Attribute::AttrKind Attr) { F->addFnAttr(Attr); }); - - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I) { - auto BA = BF->getArgument(I->getArgNo()); - mapValue(BA, &(*I)); - setName(&(*I), BA); - AttributeMask IllegalAttrs = - AttributeFuncs::typeIncompatible(I->getType(), I->getAttributes()); - BA->foreachAttr([&](SPIRVFuncParamAttrKind Kind) { - // Skip this function parameter attribute as it will translated among - // OpenCL metadata - if (Kind == FunctionParameterAttributeRuntimeAlignedINTEL) - return; - Attribute::AttrKind LLVMKind = SPIRSPIRVFuncParamAttrMap::rmap(Kind); - if (IllegalAttrs.contains(LLVMKind)) - return; - Type *AttrTy = nullptr; - switch (LLVMKind) { - case Attribute::AttrKind::ByVal: - case Attribute::AttrKind::StructRet: - AttrTy = transType(BA->getType()->getPointerElementType()); - break; - default: - break; // do nothing - } - // Make sure to use a correct constructor for a typed/typeless attribute - auto A = AttrTy ? Attribute::get(*Context, LLVMKind, AttrTy) - : Attribute::get(*Context, LLVMKind); - I->addAttr(A); - }); - - AttrBuilder Builder(*Context); - SPIRVWord MaxOffset = 0; - if (BA->hasDecorate(DecorationMaxByteOffset, 0, &MaxOffset)) - Builder.addDereferenceableAttr(MaxOffset); - SPIRVWord AlignmentBytes = 0; - if (BA->hasDecorate(DecorationAlignment, 0, &AlignmentBytes)) - Builder.addAlignmentAttr(AlignmentBytes); - I->addAttrs(Builder); - } - BF->foreachReturnValueAttr([&](SPIRVFuncParamAttrKind Kind) { - if (Kind == FunctionParameterAttributeNoWrite) - return; - F->addRetAttr(SPIRSPIRVFuncParamAttrMap::rmap(Kind)); - }); + transFunctionAttrs(BF, F); // Creating all basic blocks before creating instructions. for (size_t I = 0, E = BF->getNumBasicBlock(); I != E; ++I) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp index 6da82f3e3e567..d204177559d49 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp @@ -39,6 +39,7 @@ #include "SPIRVRegularizeLLVM.h" #include "OCLUtil.h" #include "SPIRVInternal.h" +#include "SPIRVMDWalker.h" #include "libSPIRV/SPIRVDebug.h" #include "llvm/ADT/StringExtras.h" // llvm::isDigit @@ -433,65 +434,6 @@ bool SPIRVRegularizeLLVMBase::runRegularizeLLVM(Module &Module) { return true; } -// This is a temporary workaround to deal with a graphics driver failure not -// able to support the typed pointer reverse translation of -// getelementptr i8, ptr @__spirv_Builtin* patterns. This replaces such -// accesses with getelementptr i32, ptr @__spirv_Builtin instead. -static void simplifyBuiltinVarAccesses(GlobalValue *GV) { - // IGC only supports: - // load GV - // load (addrspacecast GV) - // load (gep (addrspacecast GV)) - // load (gep GV) - // Opaque pointers will cause the optimizer to use i8 geps, or to remove - // 0-index geps entirely (adding bitcasts to the result). Restore these to - // avoid bitcasts in the resulting IR. - Type *Ty = GV->getValueType(); - Type *ScalarTy = Ty->getScalarType(); - SmallVector Users; - for (auto User : GV->users()) { - if (auto *LI = dyn_cast(User)) { - if (LI->getType() != Ty) - Users.push_back(LI); - } else if (auto *GEP = dyn_cast(User)) { - if (GEP->getSourceElementType() != Ty) - Users.push_back(GEP); - } - } - - Type *Int32Ty = Type::getInt32Ty(GV->getContext()); - auto GetGep = [&](unsigned Offset, - std::optional InRange = std::nullopt) { - llvm::ConstantRange GepInRange(llvm::APInt(32, -((signed)Offset), true), - llvm::APInt(32, Offset, true)); - if (InRange) - GepInRange = *InRange; - return ConstantExpr::getGetElementPtr( - Ty, GV, - ArrayRef( - {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, Offset)}), - true, GepInRange); - }; - - const DataLayout &DL = GV->getParent()->getDataLayout(); - for (auto *User : Users) { - if (auto *LI = dyn_cast(User)) { - LI->setOperand(0, GetGep(0)); - } else if (auto *GEP = dyn_cast(User)) { - APInt Offset(64, 0); - GEP->accumulateConstantOffset(DL, Offset); - APInt Index; - uint64_t Remainder; - APInt::udivrem(Offset, ScalarTy->getScalarSizeInBits() / 8, Index, - Remainder); - assert(Remainder == 0 && "Cannot handle misaligned access to builtins"); - GEP->replaceAllUsesWith(GetGep(Index.getZExtValue(), GEP->getInRange())); - if (auto *Inst = dyn_cast(GEP)) - Inst->eraseFromParent(); - } - } -} - namespace { void regularizeWithOverflowInstrinsics(StringRef MangledName, CallInst *Call, Module *M, @@ -637,15 +579,10 @@ void prepareCacheControlsTranslation(Metadata *MD, Instruction *Inst) { /// Remove entities not representable by SPIR-V bool SPIRVRegularizeLLVMBase::regularize() { eraseUselessFunctions(M); + addKernelEntryPoint(M); expandSYCLTypeUsing(M); cleanupConversionToNonStdIntegers(M); - for (auto &GV : M->globals()) { - SPIRVBuiltinVariableKind Kind; - if (isSPIRVBuiltinVariable(&GV, &Kind)) - simplifyBuiltinVarAccesses(&GV); - } - // Kernels called by other kernels std::vector CalledKernels; for (auto I = M->begin(), E = M->end(); I != E;) { @@ -834,6 +771,69 @@ bool SPIRVRegularizeLLVMBase::regularize() { return true; } +void SPIRVRegularizeLLVMBase::addKernelEntryPoint(Module *M) { + std::vector Work; + + // Get a list of all functions that have SPIR kernel calling conv + for (auto &F : *M) { + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) + Work.push_back(&F); + } + for (auto &F : Work) { + // for declarations just make them into SPIR functions. + F->setCallingConv(CallingConv::SPIR_FUNC); + if (F->isDeclaration()) + continue; + + // Otherwise add a wrapper around the function to act as an entry point. + FunctionType *FType = F->getFunctionType(); + std::string WrapName = + kSPIRVName::EntrypointPrefix + static_cast(F->getName()); + Function *WrapFn = + getOrCreateFunction(M, F->getReturnType(), FType->params(), WrapName); + + auto *CallBB = BasicBlock::Create(M->getContext(), "", WrapFn); + IRBuilder<> Builder(CallBB); + + Function::arg_iterator DestI = WrapFn->arg_begin(); + for (const Argument &I : F->args()) { + DestI->setName(I.getName()); + DestI++; + } + SmallVector Args; + for (Argument &I : WrapFn->args()) { + Args.emplace_back(&I); + } + auto *CI = CallInst::Create(F, ArrayRef(Args), "", CallBB); + CI->setCallingConv(F->getCallingConv()); + CI->setAttributes(F->getAttributes()); + + // copy over all the metadata (should it be removed from F?) + SmallVector> MDs; + F->getAllMetadata(MDs); + WrapFn->setAttributes(F->getAttributes()); + for (auto MD = MDs.begin(), End = MDs.end(); MD != End; ++MD) { + WrapFn->addMetadata(MD->first, *MD->second); + } + WrapFn->setCallingConv(CallingConv::SPIR_KERNEL); + WrapFn->setLinkage(llvm::GlobalValue::InternalLinkage); + + Builder.CreateRet(F->getReturnType()->isVoidTy() ? nullptr : CI); + + // Have to find the spir-v metadata for execution mode and transfer it to + // the wrapper. + if (auto NMD = SPIRVMDWalker(*M).getNamedMD(kSPIRVMD::ExecutionMode)) { + while (!NMD.atEnd()) { + Function *MDF = nullptr; + auto N = NMD.nextOp(); /* execution mode MDNode */ + N.get(MDF); + if (MDF == F) + N.M->replaceOperandWith(0, ValueAsMetadata::get(WrapFn)); + } + } + } +} + } // namespace SPIRV INITIALIZE_PASS(SPIRVRegularizeLLVMLegacy, "spvregular", diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.h b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.h index 823bd612423b8..c598708516182 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.h +++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.h @@ -51,6 +51,11 @@ class SPIRVRegularizeLLVMBase { // Lower functions bool regularize(); + // SPIR-V disallows functions being entrypoints and called + // LLVM doesn't. This adds a wrapper around the entry point + // that later SPIR-V writer renames. + void addKernelEntryPoint(Module *M); + /// Some LLVM intrinsics that have no SPIR-V counterpart may be wrapped in /// @spirv.llvm_intrinsic_* function. During reverse translation from SPIR-V /// to LLVM IR we can detect this @spirv.llvm_intrinsic_* function and diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index ea69fff7e8f06..91e7164b28a2f 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -906,13 +906,19 @@ SPIRVFunction *LLVMToSPIRVBase::transFunctionDecl(Function *F) { static_cast(mapValue(F, BM->addFunction(BFT))); BF->setFunctionControlMask(transFunctionControlMask(F)); if (F->hasName()) { - if (isUniformGroupOperation(F)) - BM->getErrorLog().checkError( - BM->isAllowedToUseExtension( - ExtensionID::SPV_KHR_uniform_group_instructions), - SPIRVEC_RequiresExtension, "SPV_KHR_uniform_group_instructions\n"); - - BM->setName(BF, F->getName().str()); + if (isKernel(F)) { + /* strip the prefix as the runtime will be looking for this name */ + std::string Prefix = kSPIRVName::EntrypointPrefix; + std::string Name = F->getName().str(); + BM->setName(BF, Name.substr(Prefix.size())); + } else { + if (isUniformGroupOperation(F)) + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_KHR_uniform_group_instructions), + SPIRVEC_RequiresExtension, "SPV_KHR_uniform_group_instructions\n"); + BM->setName(BF, F->getName().str()); + } } if (!isKernel(F) && F->getLinkage() != GlobalValue::InternalLinkage) BF->setLinkageType(transLinkageType(F)); @@ -5911,7 +5917,7 @@ void LLVMToSPIRVBase::transFunction(Function *I) { if (isKernel(I)) { auto Interface = collectEntryPointInterfaces(BF, I); - BM->addEntryPoint(ExecutionModelKernel, BF->getId(), I->getName().str(), + BM->addEntryPoint(ExecutionModelKernel, BF->getId(), BF->getName(), Interface); } } @@ -6278,8 +6284,9 @@ bool LLVMToSPIRVBase::transMetadata() { // Work around to translate kernel_arg_type and kernel_arg_type_qual metadata static void transKernelArgTypeMD(SPIRVModule *BM, Function *F, MDNode *MD, std::string MDName) { - std::string KernelArgTypesMDStr = - std::string(MDName) + "." + F->getName().str() + "."; + std::string Prefix = kSPIRVName::EntrypointPrefix; + std::string Name = F->getName().str().substr(Prefix.size()); + std::string KernelArgTypesMDStr = std::string(MDName) + "." + Name + "."; for (const auto &TyOp : MD->operands()) KernelArgTypesMDStr += cast(TyOp)->getString().str() + ","; BM->getString(KernelArgTypesMDStr); diff --git a/llvm-spirv/test/entry_point_func.ll b/llvm-spirv/test/entry_point_func.ll new file mode 100644 index 0000000000000..4c8feebbaee30 --- /dev/null +++ b/llvm-spirv/test/entry_point_func.ll @@ -0,0 +1,68 @@ +;; Test to check that an LLVM spir_kernel gets translated into an +;; Entrypoint wrapper and Function with LinkageAttributes +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o - -spirv-text | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_kernel void @testfunction() { + ret void +} + +define spir_kernel void @callerfunction() { + call spir_kernel void @testfunction() + call spir_kernel void @testdeclaration() + ret void +} + +declare spir_kernel void @testdeclaration() + +; Check there is an entrypoint and a function produced. +; CHECK-SPIRV: EntryPoint 6 [[#TestEn:]] "testfunction" +; CHECK-SPIRV: EntryPoint 6 [[#CallerEn:]] "callerfunction" +; CHECK-SPIRV: Name [[#TestDecl:]] "testdeclaration" +; CHECK-SPIRV: Name [[#TestFn:]] "testfunction" +; CHECK-SPIRV: Name [[#CallerFn:]] "callerfunction" +; CHECK-SPIRV: Decorate [[#TestDecl]] LinkageAttributes "testdeclaration" Import +; CHECK-SPIRV: Decorate [[#TestFn]] LinkageAttributes "testfunction" Export +; CHECK-SPIRV: Decorate [[#CallerFn]] LinkageAttributes "callerfunction" Export + +; CHECK-SPIRV: Function [[#]] [[#TestDecl]] [[#]] [[#]] +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: FunctionEnd + +; CHECK-SPIRV: Function [[#]] [[#TestFn]] [[#]] [[#]] +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: Label +; CHECK-SPIRV-NEXT: Return +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: FunctionEnd + +; CHECK-SPIRV: Function [[#]] [[#CallerFn]] [[#]] [[#]] +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: Label +; CHECK-SPIRV-NEXT: FunctionCall [[#]] [[#]] [[#TestFn]] +; CHECK-SPIRV-NEXT: FunctionCall [[#]] [[#]] [[#TestDecl]] +; CHECK-SPIRV-NEXT: Return +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: FunctionEnd + + +; CHECK-SPIRV: Function [[#]] [[#TestEn]] [[#]] [[#]] +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: Label +; CHECK-SPIRV-NEXT: FunctionCall [[#]] [[#]] [[#TestFn]] +; CHECK-SPIRV-NEXT: Return +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: FunctionEnd + +; CHECK-SPIRV: Function [[#]] [[#CallerEn]] [[#]] [[#]] +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: Label +; CHECK-SPIRV-NEXT: FunctionCall [[#]] [[#]] [[#CallerFn]] +; CHECK-SPIRV-NEXT: Return +; CHECK-SPIRV-EMPTY: +; CHECK-SPIRV-NEXT: FunctionEnd diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_cache_controls/multiple-decoration-single-arg.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_cache_controls/multiple-decoration-single-arg.ll index fcc7c718fa5db..fe1aef8f72958 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_cache_controls/multiple-decoration-single-arg.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_cache_controls/multiple-decoration-single-arg.ll @@ -3,8 +3,8 @@ ; RUN: llvm-spirv --spirv-ext=+SPV_INTEL_cache_controls %t.bc -o %t.spv ; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[#Func:]] "test" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[#FuncGEP:]] "test_gep" +; CHECK-SPIRV-DAG: Name [[#Func:]] "test" +; CHECK-SPIRV-DAG: Name [[#FuncGEP:]] "test_gep" ; CHECK-SPIRV-DAG: TypeInt [[#Int32:]] 32 0 ; CHECK-SPIRV-DAG: Constant [[#Int32]] [[#Zero:]] 0 ; CHECK-SPIRV-DAG: Decorate [[#GEP1:]] CacheControlLoadINTEL 1 1 diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_fpga_argument_interfaces/sycl-kernel-arg-annotation.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_fpga_argument_interfaces/sycl-kernel-arg-annotation.ll index 4001bd8ebff70..992862a3b3aa7 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_fpga_argument_interfaces/sycl-kernel-arg-annotation.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_fpga_argument_interfaces/sycl-kernel-arg-annotation.ll @@ -53,6 +53,7 @@ entry: ; CHECK-SPIRV: Capability FPGAArgumentInterfacesINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_fpga_argument_interfaces" ; CHECK-SPIRV: Extension "SPV_INTEL_fpga_buffer_location" +; CHECK-SPIRV-DAG: Name [[IDS:[0-9]+]] "_arg_p" ; CHECK-SPIRV-DAG: Name [[ID:[0-9]+]] "_arg_p" ; CHECK-SPIRV: Decorate [[ID]] Alignment 4 ; CHECK-SPIRV: Decorate [[ID]] MMHostInterfaceAddressWidthINTEL 32 diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/alias.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/alias.ll index 3dffc5d1f06ac..108b04ef58345 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/alias.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/alias.ll @@ -10,11 +10,11 @@ target triple = "spir64-unknown-unknown" ; when used since they can't be translated directly. ; CHECK-SPIRV-DAG: Name [[#FOO:]] "foo" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[#BAR:]] "bar" +; CHECK-SPIRV-DAG: Name [[#BAR:]] "bar" ; CHECK-SPIRV-DAG: Name [[#Y:]] "y" ; CHECK-SPIRV-DAG: Name [[#FOOPTR:]] "foo.alias" ; CHECK-SPIRV-DAG: Decorate [[#FOO]] LinkageAttributes "foo" Export -; INTEL-CHECK-SPIRV-DAG: Decorate [[#BAR]] LinkageAttributes "bar" Export +; CHECK-SPIRV-DAG: Decorate [[#BAR]] LinkageAttributes "bar" Export ; CHECK-SPIRV-DAG: TypeInt [[#I32:]] 32 0 ; CHECK-SPIRV-DAG: TypeInt [[#I64:]] 64 0 ; CHECK-SPIRV-DAG: TypeFunction [[#FOO_TYPE:]] [[#I32]] [[#I32]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/fp-from-host.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/fp-from-host.ll index 3a9a177d9b28b..aacdcc4fbc48c 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/fp-from-host.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/fp-from-host.ll @@ -17,7 +17,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[INT32_TYPE_ID:[0-9]+]] 32 ; CHECK-SPIRV: TypePointer [[INT_PTR:[0-9]+]] 5 [[INT32_TYPE_ID]] ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[INT32_TYPE_ID]] [[INT32_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer-as-function-arg.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer-as-function-arg.ll index cd9d717273f32..a933712f4d7ef 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer-as-function-arg.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer-as-function-arg.ll @@ -33,7 +33,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9]+]] 32 ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer.ll index f4d63660f2921..bd2ceb32d4614 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/function-pointer.ll @@ -19,7 +19,7 @@ ; ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT_ID:[0-9]+]] ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT_ID]] [[TYPE_INT_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/non-uniform-function-pointer.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/non-uniform-function-pointer.ll index 526f21279589c..f4e46456f964a 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/non-uniform-function-pointer.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/non-uniform-function-pointer.ll @@ -29,7 +29,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9+]]] 32 ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/select.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/select.ll index 9e46deeaf754e..67eebd988ec77 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/select.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/CodeSectionINTEL/select.ll @@ -6,7 +6,7 @@ ; RUN: llvm-dis %t.r.bc -o %t.r.ll ; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM -; CHECK-SPIRV: EntryPoint [[#]] [[#KERNEL_ID:]] "_ZTS6kernel" +; CHECK-SPIRV: Name [[#KERNEL_ID:]] "_ZTS6kernel" ; CHECK-SPIRV-DAG: Name [[#BAR:]] "_Z3barii" ; CHECK-SPIRV-DAG: Name [[#BAZ:]] "_Z3bazii" ; CHECK-SPIRV: TypeInt [[#INT32:]] 32 diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/alias.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/alias.ll index e2e2f90aed11a..49344084a8f7a 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/alias.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/alias.ll @@ -1,5 +1,3 @@ -; XFAIL: * - ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-spirv -spirv-ext=+SPV_INTEL_function_pointers -spirv-text %t.bc -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -spirv-ext=+SPV_INTEL_function_pointers %t.bc -o %t.spv @@ -12,7 +10,7 @@ target triple = "spir64-unknown-unknown" ; when used since they can't be translated directly. ; CHECK-SPIRV-DAG: Name [[#FOO:]] "foo" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[#BAR:]] "bar" +; CHECK-SPIRV-DAG: Name [[#BAR:]] "bar" ; CHECK-SPIRV-DAG: Name [[#Y:]] "y" ; CHECK-SPIRV-DAG: Name [[#FOOPTR:]] "foo.alias" ; CHECK-SPIRV-DAG: Decorate [[#FOO]] LinkageAttributes "foo" Export @@ -34,7 +32,7 @@ target triple = "spir64-unknown-unknown" ; CHECK-LLVM: define spir_func i32 @foo(i32 %x) -; CHECK-LLVM: define spir_func void @bar(ptr %y) +; CHECK-LLVM: define spir_kernel void @bar(ptr %y) ; CHECK-LLVM: [[PTRTOINT:%.*]] = ptrtoint ptr @foo to i64 ; CHECK-LLVM: store i64 [[PTRTOINT]], ptr %y, align 8 diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/fp-from-host.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/fp-from-host.ll index 805be68f89dad..bc1943f02bb41 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/fp-from-host.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/fp-from-host.ll @@ -17,7 +17,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[INT32_TYPE_ID:[0-9]+]] 32 ; CHECK-SPIRV: TypePointer [[INT_PTR:[0-9]+]] 5 [[INT32_TYPE_ID]] ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[INT32_TYPE_ID]] [[INT32_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll index 1aba54f8a78b3..d127083425edd 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll @@ -33,7 +33,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9]+]] 32 ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer.ll index e116745ae5d56..5ba4e8f74d0bd 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/function-pointer.ll @@ -19,7 +19,7 @@ ; ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT_ID:[0-9]+]] ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT_ID]] [[TYPE_INT_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll index 1670f825f304a..4d744067c2a07 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll @@ -29,7 +29,7 @@ ; CHECK-SPIRV: Capability FunctionPointersINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_function_pointers" ; -; CHECK-SPIRV: EntryPoint [[#]] [[KERNEL_ID:[0-9]+]] "test" +; CHECK-SPIRV: Name [[KERNEL_ID:[0-9]+]] "test" ; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9+]]] 32 ; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/select.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/select.ll index 3c4c9de5bb0a9..9ee7c0283f12f 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/select.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_function_pointers/select.ll @@ -6,7 +6,7 @@ ; RUN: llvm-dis %t.r.bc -o %t.r.ll ; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[#KERNEL_ID:]] "_ZTS6kernel" +; CHECK-SPIRV-DAG: Name [[#KERNEL_ID:]] "_ZTS6kernel" ; CHECK-SPIRV-DAG: Name [[#BAR:]] "_Z3barii" ; CHECK-SPIRV-DAG: Name [[#BAZ:]] "_Z3bazii" ; CHECK-SPIRV: TypeInt [[#INT32:]] 32 diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_unstructured_loop_controls/FPGAUnstructuredLoopAttr.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_unstructured_loop_controls/FPGAUnstructuredLoopAttr.ll index 0d292c7b48a7e..df154730de407 100644 --- a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_unstructured_loop_controls/FPGAUnstructuredLoopAttr.ll +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_unstructured_loop_controls/FPGAUnstructuredLoopAttr.ll @@ -9,10 +9,10 @@ ; CHECK-SPIRV: Capability FPGALoopControlsINTEL ; CHECK-SPIRV: Extension "SPV_INTEL_fpga_loop_controls" ; CHECK-SPIRV: Extension "SPV_INTEL_unstructured_loop_controls" -; CHECK-SPIRV: EntryPoint [[#]] [[FOO:[0-9]+]] "foo" -; CHECK-SPIRV: EntryPoint [[#]] [[BOO:[0-9]+]] "boo" +; CHECK-SPIRV: Name [[FOO:[0-9]+]] "foo" ; CHECK-SPIRV: Name [[ENTRY_1:[0-9]+]] "entry" ; CHECK-SPIRV: Name [[FOR:[0-9]+]] "for.cond" +; CHECK-SPIRV: Name [[BOO:[0-9]+]] "boo" ; CHECK-SPIRV: Name [[ENTRY_2:[0-9]+]] "entry" ; CHECK-SPIRV: Name [[WHILE:[0-9]+]] "while.body" diff --git a/llvm-spirv/test/mem2reg.cl b/llvm-spirv/test/mem2reg.cl index e5d4ad2507daa..ef1dddbf3ed21 100644 --- a/llvm-spirv/test/mem2reg.cl +++ b/llvm-spirv/test/mem2reg.cl @@ -1,10 +1,11 @@ // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -cl-std=CL2.0 -x cl -disable-O0-optnone %s -emit-llvm-bc -o %t.bc // RUN: llvm-spirv -s %t.bc -// RUN: llvm-dis < %t.bc | FileCheck %s --check-prefixes=CHECK,CHECK-WO +// RUN: llvm-dis < %t.bc | FileCheck %s --check-prefixes=CHECK-WO // RUN: llvm-spirv -s -spirv-mem2reg %t.bc -o %t.opt.bc -// RUN: llvm-dis < %t.opt.bc | FileCheck %s --check-prefixes=CHECK,CHECK-W -// CHECK-LABEL: spir_kernel void @foo +// RUN: llvm-dis < %t.opt.bc | FileCheck %s --check-prefixes=CHECK-W +// CHECK-W-LABEL: spir_func void @foo // CHECK-W-NOT: alloca +// CHECK-WO-LABEL: spir_kernel void @foo // CHECK-WO: alloca __kernel void foo(__global int *a) { *a = *a + 1; diff --git a/llvm-spirv/test/transcoding/OpenCL/atomic_cmpxchg.cl b/llvm-spirv/test/transcoding/OpenCL/atomic_cmpxchg.cl index 18dd57ae39694..aeffc836748c2 100644 --- a/llvm-spirv/test/transcoding/OpenCL/atomic_cmpxchg.cl +++ b/llvm-spirv/test/transcoding/OpenCL/atomic_cmpxchg.cl @@ -22,7 +22,7 @@ __kernel void test_atomic_cmpxchg(__global int *p, int cmp, int val) { atomic_cmpxchg(up, ucmp, uval); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST:[0-9]+]] "test_atomic_cmpxchg" +// CHECK-SPIRV: Name [[TEST:[0-9]+]] "test_atomic_cmpxchg" // CHECK-SPIRV-DAG: TypeInt [[UINT:[0-9]+]] 32 0 // CHECK-SPIRV-TYPED-PTRS-DAG: TypePointer [[UINT_PTR:[0-9]+]] 5 [[UINT]] // CHECK-SPIRV-UNTYPED-PTRS-DAG: TypeUntypedPointerKHR [[UINT_PTR:[0-9]+]] 5 diff --git a/llvm-spirv/test/transcoding/OpenCL/atomic_legacy.cl b/llvm-spirv/test/transcoding/OpenCL/atomic_legacy.cl index 93e4b5db12997..31a2cc3ef387c 100644 --- a/llvm-spirv/test/transcoding/OpenCL/atomic_legacy.cl +++ b/llvm-spirv/test/transcoding/OpenCL/atomic_legacy.cl @@ -18,7 +18,7 @@ __kernel void test_legacy_atomics(__global int *p, int val) { atomic_add(p, val); // from OpenCL C 1.1 } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST:[0-9]+]] "test_legacy_atomics" +// CHECK-SPIRV: Name [[TEST:[0-9]+]] "test_legacy_atomics" // CHECK-SPIRV-DAG: TypeInt [[UINT:[0-9]+]] 32 0 // CHECK-SPIRV-TYPED-PTRS-DAG: TypePointer [[UINT_PTR:[0-9]+]] 5 [[UINT]] // CHECK-SPIRV-UNTYPED-PTRS-DAG: TypeUntypedPointerKHR [[UINT_PTR:[0-9]+]] 5 diff --git a/llvm-spirv/test/transcoding/OpenCL/atomic_work_item_fence.cl b/llvm-spirv/test/transcoding/OpenCL/atomic_work_item_fence.cl index fd1031c574315..6cc9c14339bdb 100644 --- a/llvm-spirv/test/transcoding/OpenCL/atomic_work_item_fence.cl +++ b/llvm-spirv/test/transcoding/OpenCL/atomic_work_item_fence.cl @@ -23,7 +23,7 @@ __kernel void test_mem_fence_non_const_flags(cl_mem_fence_flags flags, memory_or // atomic_work_item_fence(flags, order, scope); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST_CONST_FLAGS:[0-9]+]] "test_mem_fence_const_flags" +// CHECK-SPIRV: Name [[TEST_CONST_FLAGS:[0-9]+]] "test_mem_fence_const_flags" // CHECK-SPIRV: TypeInt [[UINT:[0-9]+]] 32 0 // // 0x0 Relaxed + 0x100 WorkgroupMemory diff --git a/llvm-spirv/test/transcoding/OpenCL/barrier.cl b/llvm-spirv/test/transcoding/OpenCL/barrier.cl index 3cb0040aa5151..c2ee95340c137 100644 --- a/llvm-spirv/test/transcoding/OpenCL/barrier.cl +++ b/llvm-spirv/test/transcoding/OpenCL/barrier.cl @@ -28,7 +28,7 @@ __kernel void test_barrier_non_const_flags(cl_mem_fence_flags flags) { // barrier(flags); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" +// CHECK-SPIRV: Name [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" // CHECK-SPIRV: TypeInt [[UINT:[0-9]+]] 32 0 // // In SPIR-V, barrier is represented as OpControlBarrier [3] and OpenCL diff --git a/llvm-spirv/test/transcoding/OpenCL/mem_fence.cl b/llvm-spirv/test/transcoding/OpenCL/mem_fence.cl index 77945c08e00fa..4c12695904449 100644 --- a/llvm-spirv/test/transcoding/OpenCL/mem_fence.cl +++ b/llvm-spirv/test/transcoding/OpenCL/mem_fence.cl @@ -34,7 +34,7 @@ __kernel void test_mem_fence_non_const_flags(cl_mem_fence_flags flags) { // mem_fence(flags); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST_CONST_FLAGS:[0-9]+]] "test_mem_fence_const_flags" +// CHECK-SPIRV: Name [[TEST_CONST_FLAGS:[0-9]+]] "test_mem_fence_const_flags" // CHECK-SPIRV: TypeInt [[UINT:[0-9]+]] 32 0 // // In SPIR-V, mem_fence is represented as OpMemoryBarrier [2] and OpenCL diff --git a/llvm-spirv/test/transcoding/OpenCL/sub_group_barrier.cl b/llvm-spirv/test/transcoding/OpenCL/sub_group_barrier.cl index d56db83b6d4ae..173991abd868e 100644 --- a/llvm-spirv/test/transcoding/OpenCL/sub_group_barrier.cl +++ b/llvm-spirv/test/transcoding/OpenCL/sub_group_barrier.cl @@ -31,7 +31,7 @@ __kernel void test_barrier_non_const_flags(cl_mem_fence_flags flags, memory_scop // sub_group_barrier(flags, scope); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" +// CHECK-SPIRV: Name [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" // CHECK-SPIRV: TypeInt [[UINT:[0-9]+]] 32 0 // // In SPIR-V, barrier is represented as OpControlBarrier [2] and OpenCL diff --git a/llvm-spirv/test/transcoding/OpenCL/work_group_barrier.cl b/llvm-spirv/test/transcoding/OpenCL/work_group_barrier.cl index ac331a997b782..ec6c087f035a5 100644 --- a/llvm-spirv/test/transcoding/OpenCL/work_group_barrier.cl +++ b/llvm-spirv/test/transcoding/OpenCL/work_group_barrier.cl @@ -33,7 +33,7 @@ __kernel void test_barrier_non_const_flags(cl_mem_fence_flags flags, memory_scop // work_group_barrier(flags, scope); } -// CHECK-SPIRV: EntryPoint [[#]] [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" +// CHECK-SPIRV: Name [[TEST_CONST_FLAGS:[0-9]+]] "test_barrier_const_flags" // CHECK-SPIRV: TypeInt [[UINT:[0-9]+]] 32 0 // // In SPIR-V, barrier is represented as OpControlBarrier [2] and OpenCL diff --git a/llvm-spirv/test/transcoding/SampledImage.cl b/llvm-spirv/test/transcoding/SampledImage.cl index 22270fa89df5c..c37de46333bc3 100644 --- a/llvm-spirv/test/transcoding/SampledImage.cl +++ b/llvm-spirv/test/transcoding/SampledImage.cl @@ -27,8 +27,8 @@ void sample_kernel_int(image2d_t input, float2 coords, global int4 *results, sam } // CHECK-SPIRV: Capability LiteralSampler -// CHECK-SPIRV: EntryPoint [[#]] [[sample_kernel_float:[0-9]+]] "sample_kernel_float" -// CHECK-SPIRV: EntryPoint [[#]] [[sample_kernel_int:[0-9]+]] "sample_kernel_int" +// CHECK-SPIRV: Name [[sample_kernel_float:[0-9]+]] "sample_kernel_float" +// CHECK-SPIRV: Name [[sample_kernel_int:[0-9]+]] "sample_kernel_int" // CHECK-SPIRV: TypeSampler [[TypeSampler:[0-9]+]] // CHECK-SPIRV: TypeSampledImage [[SampledImageTy:[0-9]+]] diff --git a/llvm-spirv/test/transcoding/enqueue_kernel.cl b/llvm-spirv/test/transcoding/enqueue_kernel.cl index 54635a4a04a38..8e94f18ac890c 100644 --- a/llvm-spirv/test/transcoding/enqueue_kernel.cl +++ b/llvm-spirv/test/transcoding/enqueue_kernel.cl @@ -17,11 +17,11 @@ // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer5:[0-9]+]] "__device_side_enqueue_block_invoke_5_kernel" // CHECK-SPIRV: Name [[BlockGlb1:[0-9]+]] "__block_literal_global" // CHECK-SPIRV: Name [[BlockGlb2:[0-9]+]] "__block_literal_global.1" -// CHECK-SPIRV: Name [[#InvokeFunc1:]] "__device_side_enqueue_block_invoke" -// CHECK-SPIRV: Name [[#InvokeFunc2:]] "__device_side_enqueue_block_invoke_2" -// CHECK-SPIRV: Name [[#InvokeFunc3:]] "__device_side_enqueue_block_invoke_3" -// CHECK-SPIRV: Name [[#InvokeFunc4:]] "__device_side_enqueue_block_invoke_4" -// CHECK-SPIRV: Name [[#InvokeFunc5:]] "__device_side_enqueue_block_invoke_5" +// CHECK-SPIRV: Name [[#InvokeFunc1:]] "__device_side_enqueue_block_invoke_kernel" +// CHECK-SPIRV: Name [[#InvokeFunc2:]] "__device_side_enqueue_block_invoke_2_kernel" +// CHECK-SPIRV: Name [[#InvokeFunc3:]] "__device_side_enqueue_block_invoke_3_kernel" +// CHECK-SPIRV: Name [[#InvokeFunc4:]] "__device_side_enqueue_block_invoke_4_kernel" +// CHECK-SPIRV: Name [[#InvokeFunc5:]] "__device_side_enqueue_block_invoke_5_kernel" // CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 // CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 @@ -65,7 +65,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] [[#]] [[#]] [[#]] [[#]] // CHECK-SPIRV-SAME: [[ConstInt0]] [[EventNull]] [[#]] - // CHECK-SPIRV-SAME: [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] + // CHECK-SPIRV-SAME: [[#InvokeFunc1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] // CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast ptr %block to ptr addrspace(4) // CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = bitcast ptr addrspace(4) [[Block2]] to ptr addrspace(4) @@ -85,7 +85,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] [[#]] [[#]] [[#]] [[#]] // CHECK-SPIRV-SAME: [[ConstInt2]] [[Event1]] [[Event2]] - // CHECK-SPIRV-SAME: [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] + // CHECK-SPIRV-SAME: [[#InvokeFunc2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] // CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast ptr %block4 to ptr addrspace(4) // CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = bitcast ptr addrspace(4) [[Block3]] to ptr addrspace(4) @@ -106,7 +106,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit3:[0-9]+]] [[BlockLit3Tmp]] // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] [[#]] [[#]] [[#]] [[#]] // CHECK-SPIRV-SAME: [[ConstInt2]] [[Event1]] [[Event2]] - // CHECK-SPIRV-SAME: [[BlockKer3]] [[BlockLit3]] [[ConstInt12]] [[ConstInt8]] + // CHECK-SPIRV-SAME: [[#InvokeFunc3]] [[BlockLit3]] [[ConstInt12]] [[ConstInt8]] // CHECK-SPIRV-SAME: [[LocalBuf31]] // CHECK-LLVM: [[Block0Tmp:%[0-9]+]] = addrspacecast ptr addrspace(1) @__block_literal_global to ptr addrspace(4) @@ -129,7 +129,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit4:[0-9]+]] [[BlockLit4Tmp]] // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] [[#]] [[#]] [[#]] [[#]] // CHECK-SPIRV-SAME: [[ConstInt0]] [[#]] [[#]] - // CHECK-SPIRV-SAME: [[BlockKer4]] [[BlockLit4]] [[ConstInt12]] [[ConstInt8]] + // CHECK-SPIRV-SAME: [[#InvokeFunc4]] [[BlockLit4]] [[ConstInt12]] [[ConstInt8]] // CHECK-SPIRV-SAME: [[LocalBuf41]] [[LocalBuf42]] [[LocalBuf43]] // CHECK-LLVM: [[Block1Tmp:%[0-9]+]] = addrspacecast ptr addrspace(1) @__block_literal_global.1 to ptr addrspace(4) @@ -150,7 +150,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit5:[0-9]+]] // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] [[#]] [[#]] [[#]] [[#]] // CHECK-SPIRV-SAME: [[ConstInt0]] [[#]] [[Event1]] - // CHECK-SPIRV-SAME: [[BlockKer5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]] + // CHECK-SPIRV-SAME: [[#InvokeFunc5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]] // CHECK-LLVM: [[Block5:%[0-9]+]] = addrspacecast ptr %block15 to ptr addrspace(4) // CHECK-LLVM: [[Block5Ptr:%[0-9]+]] = bitcast ptr addrspace(4) [[Block5]] to ptr addrspace(4) diff --git a/llvm-spirv/test/transcoding/kernel_arg_type_qual.ll b/llvm-spirv/test/transcoding/kernel_arg_type_qual.ll index 0a2d486c4263f..0824b49e559d8 100644 --- a/llvm-spirv/test/transcoding/kernel_arg_type_qual.ll +++ b/llvm-spirv/test/transcoding/kernel_arg_type_qual.ll @@ -14,7 +14,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 target triple = "spir64-unknown-unknown." ; CHECK-SPIRV: String [[#]] "kernel_arg_type_qual.test.volatile,const,," -; CHECK-SPIRV: Name [[ARG:[0-9]+]] "g" +; CHECK-SPIRV: Name [[ARG:1[0-9]+]] "g" ; CHECK-SPIRV: Decorate [[ARG]] Volatile ; CHECK-SPIRV-NEGATIVE-NOT: String [[#]] "kernel_arg_type_qual.test.volatile,const,," diff --git a/llvm-spirv/test/transcoding/kernel_query.ll b/llvm-spirv/test/transcoding/kernel_query.ll index a5d3c88207f09..78015194f30d3 100644 --- a/llvm-spirv/test/transcoding/kernel_query.ll +++ b/llvm-spirv/test/transcoding/kernel_query.ll @@ -40,10 +40,10 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV-DAG: Name [[BlockGlb2:[0-9]+]] "__block_literal_global.1" ; CHECK-SPIRV-DAG: Name [[BlockGlb3:[0-9]+]] "__block_literal_global.2" ; CHECK-SPIRV-DAG: Name [[BlockGlb4:[0-9]+]] "__block_literal_global.3" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[BlockKer3:[0-9]+]] "__device_side_enqueue_block_invoke_3_kernel" -; CHECK-SPIRV-DAG: EntryPoint [[#]] [[BlockKer4:[0-9]+]] "__device_side_enqueue_block_invoke_4_kernel" +; CHECK-SPIRV-DAG: Name [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" +; CHECK-SPIRV-DAG: Name [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" +; CHECK-SPIRV-DAG: Name [[BlockKer3:[0-9]+]] "__device_side_enqueue_block_invoke_3_kernel" +; CHECK-SPIRV-DAG: Name [[BlockKer4:[0-9]+]] "__device_side_enqueue_block_invoke_4_kernel" ; CHECK-LLVM: [[BlockTy:%[0-9a-z\.]+]] = type { i32, i32 } %1 = type <{ i32, i32 }> diff --git a/llvm-spirv/test/transcoding/registerallocmode.ll b/llvm-spirv/test/transcoding/registerallocmode.ll index f7b567e84502a..8c59d6be69e1e 100644 --- a/llvm-spirv/test/transcoding/registerallocmode.ll +++ b/llvm-spirv/test/transcoding/registerallocmode.ll @@ -4,11 +4,11 @@ ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-SPIRV: EntryPoint [[#]] [[#FUNC0:]] "main_l3" -; CHECK-SPIRV: EntryPoint [[#]] [[#FUNC1:]] "main_l6" -; CHECK-SPIRV: EntryPoint [[#]] [[#FUNC2:]] "main_l9" -; CHECK-SPIRV: EntryPoint [[#]] [[#FUNC3:]] "main_l13" -; CHECK-SPIRV: EntryPoint [[#]] [[#FUNC4:]] "main_l19" +; CHECK-SPIRV: Name [[#FUNC0:]] "main_l3" +; CHECK-SPIRV: Name [[#FUNC1:]] "main_l6" +; CHECK-SPIRV: Name [[#FUNC2:]] "main_l9" +; CHECK-SPIRV: Name [[#FUNC3:]] "main_l13" +; CHECK-SPIRV: Name [[#FUNC4:]] "main_l19" ; CHECK-SPIRV: Decorate [[#FUNC0]] UserSemantic "num-thread-per-eu 4" ; CHECK-SPIRV: Decorate [[#FUNC1]] UserSemantic "num-thread-per-eu 8" @@ -19,8 +19,11 @@ ; CHECK-LLVM: @[[FLAG0:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 4\00", section "llvm.metadata" ; CHECK-LLVM: @[[FLAG1:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 8\00", section "llvm.metadata" ; CHECK-LLVM: @[[FLAG2:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 0\00", section "llvm.metadata" +; CHECK-LLVM: @[[FLAG3:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 4\00", section "llvm.metadata" +; CHECK-LLVM: @[[FLAG4:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 8\00", section "llvm.metadata" +; CHECK-LLVM: @[[FLAG5:[0-9]+]] = private unnamed_addr constant [20 x i8] c"num-thread-per-eu 0\00", section "llvm.metadata" -; CHECK-LLVM: @llvm.global.annotations = appending global [3 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @main_l3, ptr @[[FLAG0]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l6, ptr @[[FLAG1]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l9, ptr @[[FLAG2]], ptr undef, i32 undef, ptr undef }], section "llvm.metadata" +; CHECK-LLVM: @llvm.global.annotations = appending global [6 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @main_l3, ptr @[[FLAG0]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l6, ptr @[[FLAG1]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l9, ptr @[[FLAG2]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l3, ptr @[[FLAG3]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l6, ptr @[[FLAG4]], ptr undef, i32 undef, ptr undef }, { ptr, ptr, ptr, i32, ptr } { ptr @main_l9, ptr @[[FLAG5]], ptr undef, i32 undef, ptr undef }], section "llvm.metadata" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64" diff --git a/llvm/lib/SYCLLowerIR/SYCLConditionalCallOnDevice.cpp b/llvm/lib/SYCLLowerIR/SYCLConditionalCallOnDevice.cpp index b226437709b93..5620b068a062e 100644 --- a/llvm/lib/SYCLLowerIR/SYCLConditionalCallOnDevice.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLConditionalCallOnDevice.cpp @@ -53,7 +53,7 @@ SYCLConditionalCallOnDevicePass::run(Module &M, ModuleAnalysisManager &) { // (FAction). FAction should be a literal (i.e. not a pointer). The // structure of the header file ensures that there is exactly one such // instruction. - bool CallFound = false; + [[maybe_unused]] bool CallFound = false; for (Instruction &I : instructions(FCaller)) { if (auto *CI = dyn_cast(&I); CI && (Intrinsic::IndependentIntrinsics::not_intrinsic == @@ -121,8 +121,8 @@ SYCLConditionalCallOnDevicePass::run(Module &M, ModuleAnalysisManager &) { Args.push_back(Call->getArgOperand(I)); // Create the new call instruction - auto *NewCall = - CallInst::Create(NewFCaller, Args, /* NameStr = */ "", Call); + auto *NewCall = CallInst::Create(NewFCaller, Args, /* NameStr = */ "", + Call->getIterator()); NewCall->setCallingConv(Call->getCallingConv()); NewCall->setDebugLoc(Call->getDebugLoc()); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 3d5fede606f9f..2386fc83fa3c9 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -767,50 +767,110 @@ Constant *getOrCreateGlobalString(Module &M, StringRef Name, StringRef Value, }); } -static void extendSpirKernelArgs(Module &M) { - SmallVector SpirKernelsMetadata; +static bool isUnsupportedDeviceGlobal(const GlobalVariable *G) { + // Skip instrumenting on "__MsanKernelMetadata" etc. + if (G->getName().starts_with("__Msan")) + return true; + if (G->getName().starts_with("__spirv_BuiltIn")) + return true; + if (G->getName().starts_with("__usid_str")) + return true; + if (G->getAddressSpace() == kSpirOffloadLocalAS || + G->getAddressSpace() == kSpirOffloadConstantAS) + return true; + return false; +} + +static void instrumentSPIRModule(Module &M) { const auto &DL = M.getDataLayout(); Type *IntptrTy = DL.getIntPtrType(M.getContext()); - // SpirKernelsMetadata only saves fixed kernels, and is described by - // following structure: - // uptr unmangled_kernel_name - // uptr unmangled_kernel_name_size - StructType *StructTy = StructType::get(IntptrTy, IntptrTy); - for (Function &F : M) { - if (F.getCallingConv() != CallingConv::SPIR_KERNEL) - continue; + // Instrument __MsanKernelMetadata, which records information of sanitized + // kernel + { + SmallVector SpirKernelsMetadata; + + // SpirKernelsMetadata only saves fixed kernels, and is described by + // following structure: + // uptr unmangled_kernel_name + // uptr unmangled_kernel_name_size + StructType *StructTy = StructType::get(IntptrTy, IntptrTy); + for (Function &F : M) { + if (F.getCallingConv() != CallingConv::SPIR_KERNEL) + continue; - if (!F.hasFnAttribute(Attribute::SanitizeMemory) || - F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) - continue; + if (!F.hasFnAttribute(Attribute::SanitizeMemory) || + F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) + continue; - auto KernelName = F.getName(); - auto *KernelNameGV = getOrCreateGlobalString(M, "__msan_kernel", KernelName, - kSpirOffloadConstantAS); - SpirKernelsMetadata.emplace_back(ConstantStruct::get( - StructTy, ConstantExpr::getPointerCast(KernelNameGV, IntptrTy), - ConstantInt::get(IntptrTy, KernelName.size()))); - } - - // Create global variable to record spirv kernels' information - ArrayType *ArrayTy = ArrayType::get(StructTy, SpirKernelsMetadata.size()); - Constant *MetadataInitializer = - ConstantArray::get(ArrayTy, SpirKernelsMetadata); - GlobalVariable *MsanSpirKernelMetadata = new GlobalVariable( - M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage, - MetadataInitializer, "__MsanKernelMetadata", nullptr, - GlobalValue::NotThreadLocal, 1); - MsanSpirKernelMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); - // Add device global attributes - MsanSpirKernelMetadata->addAttribute( - "sycl-device-global-size", std::to_string(DL.getTypeAllocSize(ArrayTy))); - MsanSpirKernelMetadata->addAttribute("sycl-device-image-scope"); - MsanSpirKernelMetadata->addAttribute("sycl-host-access", "0"); // read only - MsanSpirKernelMetadata->addAttribute("sycl-unique-id", - "_Z20__MsanKernelMetadata"); - MsanSpirKernelMetadata->setDSOLocal(true); + auto KernelName = F.getName(); + auto *KernelNameGV = getOrCreateGlobalString( + M, "__msan_kernel", KernelName, kSpirOffloadConstantAS); + SpirKernelsMetadata.emplace_back(ConstantStruct::get( + StructTy, ConstantExpr::getPointerCast(KernelNameGV, IntptrTy), + ConstantInt::get(IntptrTy, KernelName.size()))); + } + + // Create global variable to record spirv kernels' information + ArrayType *ArrayTy = ArrayType::get(StructTy, SpirKernelsMetadata.size()); + Constant *MetadataInitializer = + ConstantArray::get(ArrayTy, SpirKernelsMetadata); + GlobalVariable *MsanSpirKernelMetadata = new GlobalVariable( + M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage, + MetadataInitializer, "__MsanKernelMetadata", nullptr, + GlobalValue::NotThreadLocal, 1); + MsanSpirKernelMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + // Add device global attributes + MsanSpirKernelMetadata->addAttribute( + "sycl-device-global-size", + std::to_string(DL.getTypeAllocSize(ArrayTy))); + MsanSpirKernelMetadata->addAttribute("sycl-device-image-scope"); + MsanSpirKernelMetadata->addAttribute("sycl-host-access", + "0"); // read only + MsanSpirKernelMetadata->addAttribute("sycl-unique-id", + "_Z20__MsanKernelMetadata"); + MsanSpirKernelMetadata->setDSOLocal(true); + } + + // Handle global variables: + // - Skip sanitizing unsupported variables + // - Instrument __MsanDeviceGlobalMetadata for device globals + do { + SmallVector DeviceGlobalMetadata; + + // Device global meta data is described by a structure + // size_t device_global_size + // size_t beginning address of the device global + StructType *StructTy = StructType::get(IntptrTy, IntptrTy); + + for (auto &G : M.globals()) { + if (isUnsupportedDeviceGlobal(&G)) { + for (auto *User : G.users()) + if (auto *Inst = dyn_cast(User)) + Inst->setNoSanitizeMetadata(); + continue; + } + + DeviceGlobalMetadata.push_back(ConstantStruct::get( + StructTy, + ConstantInt::get(IntptrTy, DL.getTypeAllocSize(G.getValueType())), + ConstantExpr::getPointerCast(&G, IntptrTy))); + } + + if (DeviceGlobalMetadata.empty()) + break; + + // Create meta data global to record device globals' information + ArrayType *ArrayTy = ArrayType::get(StructTy, DeviceGlobalMetadata.size()); + Constant *MetadataInitializer = + ConstantArray::get(ArrayTy, DeviceGlobalMetadata); + GlobalVariable *MsanDeviceGlobalMetadata = new GlobalVariable( + M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage, + MetadataInitializer, "__MsanDeviceGlobalMetadata", nullptr, + GlobalValue::NotThreadLocal, 1); + MsanDeviceGlobalMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + } while (false); } PreservedAnalyses MemorySanitizerPass::run(Module &M, @@ -827,7 +887,7 @@ PreservedAnalyses MemorySanitizerPass::run(Module &M, } if (TargetTriple.isSPIROrSPIRV()) { - extendSpirKernelArgs(M); + instrumentSPIRModule(M); Modified = true; } diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_device_global.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_device_global.ll new file mode 100644 index 0000000000000..39c2775a923c2 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/instrument_device_global.ll @@ -0,0 +1,10 @@ +; RUN: opt < %s -passes=msan -msan-instrumentation-with-call-threshold=0 -msan-eager-checks=1 -S | FileCheck %s +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@.str = external addrspace(1) constant [59 x i8] +@__spirv_BuiltInGlobalInvocationId = external addrspace(1) constant <3 x i64> + +; CHECK: @__MsanDeviceGlobalMetadata +; CHECK-NOT: @__spirv_BuiltInGlobalInvocationId +; CHECK-SAME: @.str diff --git a/sycl-jit/test/internalization/promote-private-non-unit-hip.ll b/sycl-jit/test/internalization/promote-private-non-unit-hip.ll index 592987b6ae0d6..2037979b7e89f 100644 --- a/sycl-jit/test/internalization/promote-private-non-unit-hip.ll +++ b/sycl-jit/test/internalization/promote-private-non-unit-hip.ll @@ -1,4 +1,4 @@ -; REQUIRES: hip_amd +; REQUIRES: hip ; RUN: opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext \ ; RUN: -passes=sycl-internalization -S %s | FileCheck %s diff --git a/sycl-jit/test/kernel-fusion/check-failed-remapping-amdgpu.ll b/sycl-jit/test/kernel-fusion/check-failed-remapping-amdgpu.ll index 69b9ab3b7f293..8dd7784902909 100644 --- a/sycl-jit/test/kernel-fusion/check-failed-remapping-amdgpu.ll +++ b/sycl-jit/test/kernel-fusion/check-failed-remapping-amdgpu.ll @@ -1,4 +1,4 @@ -; REQUIRES: hip_amd +; REQUIRES: hip ; RUN: opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext \ ; RUN: -passes=sycl-kernel-fusion -S %s | FileCheck %s diff --git a/sycl-jit/test/kernel-fusion/check-remapping-amdgpu.ll b/sycl-jit/test/kernel-fusion/check-remapping-amdgpu.ll index 52e4710fd0c2d..f142e9d89322b 100644 --- a/sycl-jit/test/kernel-fusion/check-remapping-amdgpu.ll +++ b/sycl-jit/test/kernel-fusion/check-remapping-amdgpu.ll @@ -1,4 +1,4 @@ -; REQUIRES: hip_amd +; REQUIRES: hip ; RUN: opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext \ ; RUN: -passes=sycl-kernel-fusion -S %s | FileCheck %s diff --git a/sycl-jit/test/kernel-fusion/check-remapping-interproc-amdgpu.ll b/sycl-jit/test/kernel-fusion/check-remapping-interproc-amdgpu.ll index 7a214eef3b2dd..65a843935933b 100644 --- a/sycl-jit/test/kernel-fusion/check-remapping-interproc-amdgpu.ll +++ b/sycl-jit/test/kernel-fusion/check-remapping-interproc-amdgpu.ll @@ -1,4 +1,4 @@ -; REQUIRES: hip_amd +; REQUIRES: hip ; RUN: opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext \ ; RUN: -passes=sycl-kernel-fusion -S %s | FileCheck %s diff --git a/sycl-jit/test/lit.cfg.py b/sycl-jit/test/lit.cfg.py index fffa59585ef0e..3abb6c1d1b2d6 100644 --- a/sycl-jit/test/lit.cfg.py +++ b/sycl-jit/test/lit.cfg.py @@ -27,4 +27,4 @@ if "NVPTX" in config.llvm_targets_to_build: config.available_features.add("cuda") if "AMDGPU" in config.llvm_targets_to_build: - config.available_features.add("hip_amd") + config.available_features.add("hip") diff --git a/sycl-jit/test/materializer/basic.ll b/sycl-jit/test/materializer/basic.ll index 524322116a384..2885eb77bde78 100644 --- a/sycl-jit/test/materializer/basic.ll +++ b/sycl-jit/test/materializer/basic.ll @@ -1,4 +1,4 @@ -; RUN: %if hip_amd %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ +; RUN: %if hip %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ ; RUN: --mtriple amdgcn-amd-amdhsa -passes=sycl-spec-const-materializer -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER %s %} @@ -6,7 +6,7 @@ ; RUN: --mtriple nvptx64-nvidia-cuda -passes=sycl-spec-const-materializer -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER %s %} -; RUN: %if hip_amd %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ +; RUN: %if hip %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ ; RUN: --mtriple amdgcn-amd-amdhsa -passes=sycl-spec-const-materializer,early-cse,adce -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER-CSE %s %} diff --git a/sycl-jit/test/materializer/multi_type.ll b/sycl-jit/test/materializer/multi_type.ll index f69bd057748e0..65204e3f12e64 100644 --- a/sycl-jit/test/materializer/multi_type.ll +++ b/sycl-jit/test/materializer/multi_type.ll @@ -1,4 +1,4 @@ -; RUN: %if hip_amd %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ +; RUN: %if hip %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ ; RUN: --mtriple amdgcn-amd-amdhsa -passes=sycl-spec-const-materializer -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER %s %} @@ -6,7 +6,7 @@ ; RUN: --mtriple nvptx64-nvidia-cuda -passes=sycl-spec-const-materializer -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER %s %} -; RUN: %if hip_amd %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ +; RUN: %if hip %{ opt -load-pass-plugin %shlibdir/SYCLKernelJIT%shlibext\ ; RUN: --mtriple amdgcn-amd-amdhsa -passes=sycl-spec-const-materializer,early-cse -S %s |\ ; RUN: FileCheck --check-prefix=CHECK-MATERIALIZER-CSE %s %} diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index de4a5aa14251b..526683c9cdf97 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,8 +1,7 @@ -# commit 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f -# Merge: 9c652ffb b78cfa71 -# Author: Ross Brunton -# Date: Thu Jan 9 17:28:00 2025 +0000 -# Merge pull request #2048 from RossBrunton/ross/refc -# -# Use reference counting on factories -set(UNIFIED_RUNTIME_TAG 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f) +# commit 9e48f543b8dd39d45563169433bb529583625dfe +# Merge: 6a3fece6 1a1108b3 +# Author: Martin Grant +# Date: Wed Jan 15 14:33:29 2025 +0000 +# Merge pull request #2540 from martygrant/martin/program-info-unswitch +# Move urProgramGetInfo success test from a switch to individual tests. +set(UNIFIED_RUNTIME_TAG 9e48f543b8dd39d45563169433bb529583625dfe) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_current_device.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_current_device.asciidoc new file mode 100755 index 0000000000000..d50271681385a --- /dev/null +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_current_device.asciidoc @@ -0,0 +1,140 @@ += sycl_ext_oneapi_current_device + + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2024 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 9 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + +== Status + +This is a proposed extension specification, intended to gather community +feedback. Interfaces defined in this specification may not be implemented yet +or may be in a preliminary state. The specification itself may also change in +incompatible ways before it is finalized. *Shipping software products should +not rely on APIs defined in this specification.* + +== Overview + +This extension introduces additional state into SYCL in order to simplify +programming for developers. The extension provides a mechanism to both set and +query the 'current' per-thread `sycl::device`. By adding the notion of a 'current' +device, this can simplify interfaces and reduce the amount of boilerplate code +required to write a SYCL application. + +Since this function relates to the environment of the calling thread, +it is the user's responsibility to ensure that it is called by the correct thread. +For example, it is unsafe to call this function inside of a host task, within an +asynchronous error handler, or within other functions that may be executed +asynchronously, since these operations are not guaranteed to execute on any +specific thread. + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_CURRENT_DEVICE` to one of the values defined in the table +below. Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +=== New free functions + +This extension adds the following new free functions: + + +''' + +[frame=all,grid=none,separator="@"] +!==== +a@ +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::this_thread { + +sycl::device get_current_device(); + +} // namespace sycl::ext::oneapi::experimental::this_thread +---- +!==== + +_Returns:_ The current default device for the calling host thread. If +`set_current_device()` has not been called by this thread, returns the +device selected by the default device selector. + +_Preconditions:_ The function is called from a host thread, executing +outside of a host task or an asynchronous error handler. + +''' + +[frame=all,grid=none,separator="@"] +!==== +a@ +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::this_thread { + +void set_current_device(sycl::device dev); + +} // namespace sycl::ext::oneapi::experimental::this_thread +---- +!==== + +_Effects:_ Sets the current default device to `dev` for the calling host thread. + +_Preconditions:_ The function is called from a host thread, executing outside +of a host task or an asynchronous error handler. + +== Issues +. [RESOLVED] Should the current device be global or should we also support a per-thread + device? Answer: It should be per-thread to align with the behavior of other programming + models. +. [OPEN] We want to add a default queue per device. Should this queue be in-order or out-of-order? + Do we want to allow the user to specify this? \ No newline at end of file diff --git a/sycl/include/sycl/access/access.hpp b/sycl/include/sycl/access/access.hpp index e5c8670e7fda8..3f0049d1c9950 100644 --- a/sycl/include/sycl/access/access.hpp +++ b/sycl/include/sycl/access/access.hpp @@ -350,15 +350,6 @@ address_space_cast_is_possible(access::address_space Src, template auto static_address_cast(ElementType *Ptr) { - constexpr auto generic_space = access::address_space::generic_space; - constexpr auto global_space = access::address_space::global_space; - constexpr auto local_space = access::address_space::local_space; - constexpr auto private_space = access::address_space::private_space; - constexpr auto global_device = - access::address_space::ext_intel_global_device_space; - constexpr auto global_host = - access::address_space::ext_intel_global_host_space; - constexpr auto SrcAS = deduce_AS::value; static_assert(address_space_cast_is_possible(SrcAS, Space)); @@ -367,31 +358,7 @@ auto static_address_cast(ElementType *Ptr) { // Note: reinterpret_cast isn't enough for some of the casts between different // address spaces, use C-style cast instead. -#if !defined(__SPIR__) return (dst_type)Ptr; -#else - if constexpr (SrcAS != generic_space) { - return (dst_type)Ptr; - } else if constexpr (Space == global_space) { - return (dst_type)__spirv_GenericCastToPtr_ToGlobal( - Ptr, __spv::StorageClass::CrossWorkgroup); - } else if constexpr (Space == local_space) { - return (dst_type)__spirv_GenericCastToPtr_ToLocal( - Ptr, __spv::StorageClass::Workgroup); - } else if constexpr (Space == private_space) { - return (dst_type)__spirv_GenericCastToPtr_ToPrivate( - Ptr, __spv::StorageClass::Function); -#if !defined(__ENABLE_USM_ADDR_SPACE__) - } else if constexpr (Space == global_device || Space == global_host) { - // If __ENABLE_USM_ADDR_SPACE__ isn't defined then both - // global_device/global_host are just aliases for global_space. - return (dst_type)__spirv_GenericCastToPtr_ToGlobal( - Ptr, __spv::StorageClass::CrossWorkgroup); -#endif - } else { - return (dst_type)Ptr; - } -#endif } // Previous implementation (`castAS`, used in `multi_ptr` ctors among other @@ -427,14 +394,13 @@ auto dynamic_address_cast(ElementType *Ptr) { #if defined(__ENABLE_USM_ADDR_SPACE__) static_assert(SupressNotImplementedAssert || Space != Space, "Not supported yet!"); - return static_address_cast(Ptr); + return detail::static_address_cast(Ptr); #else // If __ENABLE_USM_ADDR_SPACE__ isn't defined then both // global_device/global_host are just aliases for global_space. static_assert(std::is_same_v); return (dst_type)Ptr; #endif -#if defined(__SPIR__) } else if constexpr (Space == global_space) { return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( Ptr, __spv::StorageClass::CrossWorkgroup); @@ -449,12 +415,11 @@ auto dynamic_address_cast(ElementType *Ptr) { (Space == global_device || Space == global_host)) { return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( Ptr, __spv::StorageClass::CrossWorkgroup); -#endif #endif } else { static_assert(SupressNotImplementedAssert || Space != Space, "Not supported yet!"); - return static_address_cast(Ptr); + return detail::static_address_cast(Ptr); } } #else // __SYCL_DEVICE_ONLY__ diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 4bae5c59bb6bb..d8afc90b48d85 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -66,7 +66,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, auto Device = detail::createSyclObjFromImpl(DeviceImpl); ur_program_handle_t Program = detail::ProgramManager::getInstance().createURProgram( - **DeviceImage, Context, {Device}); + **DeviceImage, Context, {std::move(Device)}); return {*DeviceImage, Program}; } diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index 48ef6e15b6fce..9346461c9229f 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -208,21 +208,23 @@ class PersistentDeviceCodeCache { const ur_program_handle_t &NativePrg); /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ - static void trace(const std::string &msg, std::string path = "") { + static void trace(const std::string &msg, const std::string &path = "") { static const bool traceEnabled = SYCLConfig::isTraceDiskCache(); if (traceEnabled) { - std::replace(path.begin(), path.end(), '\\', '/'); - std::cerr << "[Persistent Cache]: " << msg << path << std::endl; + auto outputPath = path; + std::replace(outputPath.begin(), outputPath.end(), '\\', '/'); + std::cerr << "[Persistent Cache]: " << msg << outputPath << std::endl; } } static void trace_KernelCompiler(const std::string &msg, - std::string path = "") { + const std::string &path = "") { static const bool traceEnabled = SYCLConfig::isTraceKernelCompiler(); if (traceEnabled) { - std::replace(path.begin(), path.end(), '\\', '/'); - std::cerr << "[kernel_compiler Persistent Cache]: " << msg << path + auto outputPath = path; + std::replace(outputPath.begin(), outputPath.end(), '\\', '/'); + std::cerr << "[kernel_compiler Persistent Cache]: " << msg << outputPath << std::endl; } } diff --git a/sycl/test-e2e/Adapters/enqueue-arg-order-buffer.cpp b/sycl/test-e2e/Adapters/enqueue-arg-order-buffer.cpp index 468069e275a29..07fd78194ef71 100644 --- a/sycl/test-e2e/Adapters/enqueue-arg-order-buffer.cpp +++ b/sycl/test-e2e/Adapters/enqueue-arg-order-buffer.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip_nvidia // RUN: %{build} -Wno-error=deprecated-declarations -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/AddressCast/dynamic_address_cast.cpp b/sycl/test-e2e/AddressCast/dynamic_address_cast.cpp index 441fe486564b3..3bdefc533ea5b 100644 --- a/sycl/test-e2e/AddressCast/dynamic_address_cast.cpp +++ b/sycl/test-e2e/AddressCast/dynamic_address_cast.cpp @@ -7,9 +7,8 @@ //===----------------------------------------------------------------------===// // Issue with OpenCL CPU runtime implementation of OpGenericCastToPtrExplicit -// OpGenericCastToPtr* intrinsics not implemented on AMD or NVIDIA // FPGA emulator affected by same issue as OpenCL CPU runtime -// UNSUPPORTED: cpu, hip, cuda, accelerator +// UNSUPPORTED: cpu, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg index d768697d07f6d..233ba3789467e 100644 --- a/sycl/test-e2e/AddressSanitizer/lit.local.cfg +++ b/sycl/test-e2e/AddressSanitizer/lit.local.cfg @@ -8,5 +8,8 @@ config.substitutions.append( ("%force_device_asan_rt", "env UR_ENABLE_LAYERS=UR_LAYER_ASAN") ) +if "-fsanitize=memory" in config.cxx_flags: + config.unsupported=True + # https://github.com/intel/llvm/issues/15953 config.unsupported_features += ['gpu-intel-gen12'] diff --git a/sycl/test-e2e/Assert/assert_in_kernels_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_kernels_ndebug.cpp index 4ca45de3d54f3..7c02b92f0aad9 100644 --- a/sycl/test-e2e/Assert/assert_in_kernels_ndebug.cpp +++ b/sycl/test-e2e/Assert/assert_in_kernels_ndebug.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -DNDEBUG %S/assert_in_kernels.cpp -o %t.out +// RUN: %clangxx -fsycl %{sycl_target_opts} -DNDEBUG %S/assert_in_kernels.cpp -o %t.out // RUN: %{run} %t.out | FileCheck %s // // CHECK-NOT: One shouldn't see this message diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp index 8ffea706d4f58..3b66660b8c2b5 100644 --- a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp +++ b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp @@ -9,7 +9,7 @@ // XFAIL: (opencl && gpu) // XFAIL-TRACKER: https://github.com/intel/llvm/issues/11364 -// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%{sycl_triple} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl %{sycl_target_opts} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out // Shouldn't fail on ACC as fallback assert isn't enqueued there // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt %if fpga %{ --check-prefix=CHECK-ACC %} // diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug_win.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug_win.cpp index 2cbc05540fa69..9e02e01681190 100644 --- a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug_win.cpp +++ b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug_win.cpp @@ -1,5 +1,5 @@ // REQUIRES: windows -// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%{sycl_triple} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl %{sycl_target_opts} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out // Shouldn't fail on ACC as fallback assert isn't enqueued there // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt %if fpga %{ --check-prefix=CHECK-ACC %} // diff --git a/sycl/test-e2e/Assert/assert_in_one_kernel_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_one_kernel_ndebug.cpp index 1f68c39e08438..0180ec3752ec6 100644 --- a/sycl/test-e2e/Assert/assert_in_one_kernel_ndebug.cpp +++ b/sycl/test-e2e/Assert/assert_in_one_kernel_ndebug.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -DNDEBUG %S/assert_in_one_kernel.cpp -o %t.out +// RUN: %clangxx -fsycl %{sycl_target_opts} -DNDEBUG %S/assert_in_one_kernel.cpp -o %t.out // RUN: %{run} %t.out | FileCheck %s // // CHECK-NOT: from assert statement diff --git a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp index 731187fe4cdf7..e7f419cedf7e7 100644 --- a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp +++ b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp @@ -5,7 +5,7 @@ // XFAIL: (opencl && gpu) // XFAIL-TRACKER: https://github.com/intel/llvm/issues/11364 // -// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%{sycl_triple} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl %{sycl_target_opts} -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib // RUN: %if cpu %{ %{run} %t.out &> %t.cpu.txt ; FileCheck %s --input-file %t.cpu.txt %} // // Since this is a multi-threaded application enable memory tracking and diff --git a/sycl/test-e2e/AtomicRef/assignment_atomic64_generic.cpp b/sycl/test-e2e/AtomicRef/assignment_atomic64_generic.cpp index 1469e40139e07..c0bfa74ad33d5 100644 --- a/sycl/test-e2e/AtomicRef/assignment_atomic64_generic.cpp +++ b/sycl/test-e2e/AtomicRef/assignment_atomic64_generic.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15791 #include "assignment.h" diff --git a/sycl/test-e2e/AtomicRef/exchange.cpp b/sycl/test-e2e/AtomicRef/exchange.cpp index 0252142480c52..7e405689c65c8 100644 --- a/sycl/test-e2e/AtomicRef/exchange.cpp +++ b/sycl/test-e2e/AtomicRef/exchange.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15791 #include "exchange.h" diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index 9c69e0cd7bf71..7d58e048519f4 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -5,11 +5,11 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} +// RUN: %clangxx -fsycl %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} // RUN: %{run} %t.out // Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA ) -// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} #include "bfloat16_builtins.hpp" diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index 6db30932609f5..719bf4709ae4c 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -7,7 +7,7 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // If CUDA, test "new" again for sm_75/generic -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} +// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} // RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} #include "bfloat16_builtins.hpp" diff --git a/sycl/test-e2e/Basic/built-ins.cpp b/sycl/test-e2e/Basic/built-ins.cpp index e10cf7ba8a08a..5967e7837d505 100644 --- a/sycl/test-e2e/Basic/built-ins.cpp +++ b/sycl/test-e2e/Basic/built-ins.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t_var.out | FileCheck %s // Hits an assertion and kernel page fault with AMD: -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/14404 #include diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 817b6c46b087b..3b015051377c9 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -2,8 +2,7 @@ // RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // // TODO: Behaviour is unstable for level zero on Windows. Enable when fixed. -// TODO: The test is sporadically fails on CUDA. Enable when fixed. -// UNSUPPORTED: (windows && level_zero) || hip_nvidia +// UNSUPPORTED: (windows && level_zero) #define SYCL2020_DISABLE_DEPRECATION_WARNINGS diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp index 7e30406189e29..afe3ebd0d2557 100644 --- a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -58,17 +58,15 @@ template struct KernelFunctorWithMaxWGSizeProp { } }; -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { +template +int test(queue &Q, KernelType KernelFunc) { constexpr size_t Dims = 1; // Positive test case: Specify local size that matches required size. try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(8), range(I)), Props, - KernelFunc); + nd_range(repeatRange(8), range(I)), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -81,8 +79,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(8), range(I)), Props, - KernelFunc); + nd_range(repeatRange(8), range(I)), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr @@ -97,7 +94,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -109,7 +106,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.parallel_for>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr << "Test case MaxLinearWGSizeNoLocalPositive shortcut failed: " @@ -122,7 +119,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); }); Q.wait_and_throw(); @@ -147,7 +144,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); Q.wait_and_throw(); std::cerr @@ -174,17 +171,10 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { } template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_linear_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; KernelFunctorWithMaxWGSizeProp KernelFunctor; int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); + Res += test(Q, KernelFunctor); return Res; } diff --git a/sycl/test-e2e/Basic/max_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_work_group_size_props.cpp index 6376aa0a10392..6694cb1d35d3f 100644 --- a/sycl/test-e2e/Basic/max_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_work_group_size_props.cpp @@ -49,17 +49,15 @@ template struct KernelFunctorWithMaxWGSizeProp { } }; -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { +template +int test(queue &Q, KernelType KernelFunc) { constexpr size_t Dims = sizeof...(Is); // Positive test case: Specify local size that matches required size. try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -71,8 +69,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr << "Test case MaxWGSizePositive shortcut failed: unexpected " @@ -86,7 +83,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -98,7 +95,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.parallel_for>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr << "Test case MaxWGSizeNoLocalPositive shortcut failed: " @@ -111,7 +108,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); }); Q.wait_and_throw(); @@ -134,7 +131,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); Q.wait_and_throw(); std::cerr << "Test case MaxWGSizeNegative shortcut failed: no exception " @@ -159,17 +156,10 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { } template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; KernelFunctorWithMaxWGSizeProp KernelFunctor; int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); + Res += test(Q, KernelFunctor); return Res; } diff --git a/sycl/test-e2e/Basic/multisource.cpp b/sycl/test-e2e/Basic/multisource.cpp index 23c95ce2eddd3..db7b4dbf39e74 100644 --- a/sycl/test-e2e/Basic/multisource.cpp +++ b/sycl/test-e2e/Basic/multisource.cpp @@ -9,14 +9,14 @@ // Separate kernel sources and host code sources // RUN: %{build} -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL // RUN: %{build} -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.kernel.o %t.main.o -Wno-unused-command-line-argument -o %t1.fat +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.kernel.o %t.main.o -Wno-unused-command-line-argument -o %t1.fat // RUN: %{run} %t1.fat // Multiple sources with kernel code // RUN: %{build} -c -o %t.init.o -DINIT_KERNEL // RUN: %{build} -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t2.fat +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t2.fat // RUN: %{run} %t2.fat #include diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp index 0f097ce3cd5db..25ff92eda2c77 100644 --- a/sycl/test-e2e/Basic/multisource_spv_obj.cpp +++ b/sycl/test-e2e/Basic/multisource_spv_obj.cpp @@ -11,21 +11,21 @@ // Separate kernel sources and host code sources // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.kernel.o %t.main.o -Wno-unused-command-line-argument -o %t1.fat +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.kernel.o %t.main.o -Wno-unused-command-line-argument -o %t1.fat // RUN: %{run} %t1.fat // Multiple sources with kernel code // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.init.o -DINIT_KERNEL // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t2.fat +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t2.fat // RUN: %{run} %t2.fat // Multiple sources with kernel code, mixed SPIR-V and LLVM-IR objects // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.init.o -DINIT_KERNEL // RUN: %{build} -fsycl-device-obj=llvmir -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t3.fat +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.init.o %t.calc.o %t.main.o -Wno-unused-command-line-argument -o %t3.fat // RUN: %{run} %t3.fat #include diff --git a/sycl/test-e2e/Basic/work_group_size_prop.cpp b/sycl/test-e2e/Basic/work_group_size_prop.cpp index ac8400dcc31b8..9cf04c4d2ea66 100644 --- a/sycl/test-e2e/Basic/work_group_size_prop.cpp +++ b/sycl/test-e2e/Basic/work_group_size_prop.cpp @@ -45,9 +45,8 @@ template struct KernelFunctorWithWGSizeProp { } }; -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { +template +int test(queue &Q, KernelType KernelFunc) { constexpr size_t Dims = sizeof...(Is); bool IsOpenCL = (Q.get_backend() == backend::opencl); @@ -56,8 +55,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -69,8 +67,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " @@ -87,7 +84,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { Q.submit([&](handler &CGH) { CGH.parallel_for< ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { @@ -99,7 +96,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.parallel_for>( - repeatRange(16), Props, KernelFunc); + repeatRange(16), KernelFunc); Q.wait_and_throw(); } catch (exception &E) { std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " @@ -113,7 +110,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { try { Q.submit([&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); }); Q.wait_and_throw(); @@ -137,7 +134,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); Q.wait_and_throw(); std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " @@ -162,17 +159,10 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { } template int test(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; KernelFunctorWithWGSizeProp KernelFunctor; int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); + Res += test(Q, KernelFunctor); return Res; } diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp index 88f55d00aa903..6076834aac650 100644 --- a/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp @@ -1,6 +1,6 @@ // REQUIRES: arch-intel_gpu_pvc, ocloc -// XFAIL: arch-intel_gpu_pvc +// XFAIL: arch-intel_gpu_pvc && opencl && igc-dev // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16401 // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc %s -o %t.out diff --git a/sycl/test-e2e/DeviceCodeSplit/grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp index 1e5b085d207d6..3642483e52566 100644 --- a/sycl/test-e2e/DeviceCodeSplit/grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -14,7 +14,7 @@ // compiler option // REQUIRES: arch-intel_gpu_pvc -// XFAIL: arch-intel_gpu_pvc +// XFAIL: arch-intel_gpu_pvc && opencl // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16401 // RUN: %{build} -Wno-error=deprecated-declarations -o %t1.out diff --git a/sycl/test-e2e/DeviceGlobal/device_global_static.cpp b/sycl/test-e2e/DeviceGlobal/device_global_static.cpp index 363c716b9d98a..75c7fc165016d 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_static.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_static.cpp @@ -4,7 +4,7 @@ // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15329 // // Tests static device_global access through device kernels. diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp index 74758a837cd46..f0de42b7f13f9 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: cuda // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. -// XFAIL: hip_amd +// XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742 #include diff --git a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp index 5952e4e418935..1bdaf3b1d6270 100644 --- a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp @@ -10,7 +10,7 @@ // RUN: %clangxx %{dynamic_lib_options} %S/Inputs/b.cpp %if windows %{%T/libdevice_c.lib%} -o %T/libdevice_b.%{dynamic_lib_suffix} // RUN: %clangxx %{dynamic_lib_options} %S/Inputs/a.cpp %if windows %{%T/libdevice_b.lib%} -o %T/libdevice_a.%{dynamic_lib_suffix} -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -fsycl-allow-device-image-dependencies -fsycl-device-code-split=per_kernel %S/Inputs/basic.cpp -o %t.out \ +// RUN: %clangxx -fsycl %{sycl_target_opts} -fsycl-allow-device-image-dependencies -fsycl-device-code-split=per_kernel %S/Inputs/basic.cpp -o %t.out \ // RUN: %if windows \ // RUN: %{%T/libdevice_a.lib%} \ // RUN: %else \ diff --git a/sycl/test-e2e/DeviceImageDependencies/free_function_kernels.cpp b/sycl/test-e2e/DeviceImageDependencies/free_function_kernels.cpp index 5c50f8430ad78..40862c5dc6ad9 100644 --- a/sycl/test-e2e/DeviceImageDependencies/free_function_kernels.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/free_function_kernels.cpp @@ -7,7 +7,7 @@ // The name mangling for free function kernels currently does not work with PTX. // UNSUPPORTED: cuda -// XFAIL: hip_amd +// XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742 #include diff --git a/sycl/test-e2e/DeviceImageDependencies/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/objects.cpp index 17409b209781c..eea085dc9b905 100644 --- a/sycl/test-e2e/DeviceImageDependencies/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/objects.cpp @@ -6,5 +6,5 @@ // RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o // RUN: %clangxx -fsycl %S/Inputs/c.cpp -I %S/Inputs -c -o %t_c.o // RUN: %clangxx -fsycl %S/Inputs/d.cpp -I %S/Inputs -c -o %t_d.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -fsycl-device-code-split=per_kernel -fsycl-allow-device-image-dependencies %t_a.o %t_b.o %t_c.o %t_d.o %S/Inputs/basic.cpp -o %t.out +// RUN: %clangxx -fsycl %{sycl_target_opts} -fsycl-device-code-split=per_kernel -fsycl-allow-device-image-dependencies %t_a.o %t_b.o %t_c.o %t_d.o %S/Inputs/basic.cpp -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ESIMD/named_barriers/loop_extended.cpp b/sycl/test-e2e/ESIMD/named_barriers/loop_extended.cpp index b42c58181ca4c..285c62c185e12 100644 --- a/sycl/test-e2e/ESIMD/named_barriers/loop_extended.cpp +++ b/sycl/test-e2e/ESIMD/named_barriers/loop_extended.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// // REQUIRES: arch-intel_gpu_pvc +// UNSUPPORTED: arch-intel_gpu_pvc +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16598 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/Inputs/work_group_size_prop.cpp b/sycl/test-e2e/Graph/Inputs/work_group_size_prop.cpp index e052ab5acb3bf..7fd3d8eef1856 100644 --- a/sycl/test-e2e/Graph/Inputs/work_group_size_prop.cpp +++ b/sycl/test-e2e/Graph/Inputs/work_group_size_prop.cpp @@ -40,9 +40,8 @@ template struct KernelFunctorWithWGSizeProp { } }; -template -int test(queue &Queue, PropertiesT Props, KernelType KernelFunc) { +template +int test(queue &Queue, KernelType KernelFunc) { constexpr size_t Dims = sizeof...(Is); // Positive test case: Specify local size that matches required size. @@ -52,15 +51,13 @@ int test(queue &Queue, PropertiesT Props, KernelType KernelFunc) { add_node(Graph, Queue, [&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); }); #ifdef GRAPH_E2E_RECORD_REPLAY Graph.begin_recording(Queue); Queue.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); + nd_range(repeatRange(8), range(Is...)), KernelFunc); Graph.end_recording(Queue); #endif @@ -83,7 +80,7 @@ int test(queue &Queue, PropertiesT Props, KernelType KernelFunc) { try { add_node(GraphN, Queue, [&](handler &CGH) { CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); }); auto ExecGraph = GraphN.finalize(); @@ -119,7 +116,7 @@ int test(queue &Queue, PropertiesT Props, KernelType KernelFunc) { GraphN.begin_recording(Queue); Queue.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, + nd_range(repeatRange(16), repeatRange(8)), KernelFunc); GraphN.end_recording(Queue); @@ -156,17 +153,10 @@ int test(queue &Queue, PropertiesT Props, KernelType KernelFunc) { } template int test(queue &Queue) { - auto Props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = sycl::ext::oneapi::experimental::properties{}; KernelFunctorWithWGSizeProp KernelFunctor; int Res = 0; - Res += test(Queue, Props, KernelFunction); - Res += test(Queue, EmptyProps, KernelFunctor); - Res += test(Queue, Props, KernelFunctor); + Res += test(Queue, KernelFunctor); return Res; } diff --git a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp index 4d6aa6445cd0e..194d098aec8c8 100644 --- a/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp +++ b/sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp @@ -20,9 +20,11 @@ int main() { // Use a large N to try and make the kernel slow const size_t N = 1 << 16; - // Loop inside kernel to make even slower (too large N runs out of memory) - const size_t NumKernelLoops = 4; - const size_t NumSubmitLoops = 8; + + // Reduce amount of work compared to version of test without free functions + // due to CMPLRLLVM-64841 + const size_t NumKernelLoops = 1; + const size_t NumSubmitLoops = 1; exp_ext::command_graph Graph{Ctxt, Queue.get_device()}; diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ordering.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ordering.cpp index fdd5ffa52fe9b..5a9de103053eb 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ordering.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ordering.cpp @@ -18,7 +18,7 @@ int main() { const size_t N = 1 << 16; // Loop inside kernel to make even slower (too large N runs out of memory) const size_t NumKernelLoops = 4; - const size_t NumSubmitLoops = 8; + const size_t NumSubmitLoops = 2; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 2e50634fd21c8..257b5a4e4457f 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -2,7 +2,10 @@ // XFAIL: (opencl && !cpu && !accelerator) // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641 -// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd +// remove the flag once the issue is resolved. +// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // Disabled temporarily while investigation into the failure is ongoing. @@ -42,9 +45,14 @@ void testQueriesAndProperties() { .ext_oneapi_get_info( q, wgRange, wgRange.size() * sizeof(int)); - const auto props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::use_root_sync}; - q.single_task(props, []() {}); + struct TestKernel0 { + void operator()() const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::use_root_sync}; + } + }; + q.single_task(TestKernel0{}); static auto check_max_num_work_group_sync = [](auto Result) { static_assert(std::is_same_v, size_t>, @@ -99,6 +107,32 @@ void testRootGroup() { } } +template struct TestKernel2 { + T m_testResults; + TestKernel2(T &testResults_) : m_testResults(testResults_) {} + void operator()(sycl::nd_item<1> it) const { + const auto root = it.ext_oneapi_get_root_group(); + if (root.leader() || root.get_local_id() == 3) { + m_testResults[0] = root.get_group_id() == sycl::id<1>(0); + m_testResults[1] = root.leader() ? root.get_local_id() == sycl::id<1>(0) + : root.get_local_id() == sycl::id<1>(3); + m_testResults[2] = root.get_group_range() == sycl::range<1>(1); + m_testResults[3] = root.get_local_range() == it.get_global_range(); + m_testResults[4] = root.get_max_local_range() == root.get_local_range(); + m_testResults[5] = root.get_group_linear_id() == 0; + m_testResults[6] = + root.get_local_linear_id() == root.get_local_id().get(0); + m_testResults[7] = root.get_group_linear_range() == 1; + m_testResults[8] = + root.get_local_linear_range() == root.get_local_range().size(); + } + } + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::use_root_sync}; + } +}; + void testRootGroupFunctions() { sycl::queue q; const auto bundle = @@ -109,34 +143,13 @@ void testRootGroupFunctions() { .ext_oneapi_get_info( q, WorkGroupSize, 0); - const auto props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::use_root_sync}; - constexpr int testCount = 9; sycl::buffer testResultsBuf{sycl::range{testCount}}; const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize}; q.submit([&](sycl::handler &h) { sycl::accessor testResults{testResultsBuf, h}; - h.parallel_for( - range, props, [=](sycl::nd_item<1> it) { - const auto root = it.ext_oneapi_get_root_group(); - if (root.leader() || root.get_local_id() == 3) { - testResults[0] = root.get_group_id() == sycl::id<1>(0); - testResults[1] = root.leader() - ? root.get_local_id() == sycl::id<1>(0) - : root.get_local_id() == sycl::id<1>(3); - testResults[2] = root.get_group_range() == sycl::range<1>(1); - testResults[3] = root.get_local_range() == it.get_global_range(); - testResults[4] = - root.get_max_local_range() == root.get_local_range(); - testResults[5] = root.get_group_linear_id() == 0; - testResults[6] = - root.get_local_linear_id() == root.get_local_id().get(0); - testResults[7] = root.get_group_linear_range() == 1; - testResults[8] = - root.get_local_linear_range() == root.get_local_range().size(); - } - }); + h.parallel_for(range, + TestKernel2(testResults)); }); sycl::host_accessor testResults{testResultsBuf}; for (int i = 0; i < testCount; i++) { diff --git a/sycl/test-e2e/HierPar/hier_par_wgscope.cpp b/sycl/test-e2e/HierPar/hier_par_wgscope.cpp index e950055641770..dc709664e53e0 100644 --- a/sycl/test-e2e/HierPar/hier_par_wgscope.cpp +++ b/sycl/test-e2e/HierPar/hier_par_wgscope.cpp @@ -3,7 +3,7 @@ // RUN: %{run} %t.out // // Test hangs on AMD -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip //==- hier_par_wgscope.cpp --- hierarchical parallelism test for WG scope---==// // diff --git a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp index a1d2ec9220763..0bc3d9624f749 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: cuda || hip_nvidia +// UNSUPPORTED: cuda // REQUIRES: gpu,linux,sg-16 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/InlineAsm/asm_8_empty.cpp b/sycl/test-e2e/InlineAsm/asm_8_empty.cpp index 4a690b3088b51..a87704672680b 100644 --- a/sycl/test-e2e/InlineAsm/asm_8_empty.cpp +++ b/sycl/test-e2e/InlineAsm/asm_8_empty.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: cuda || hip_nvidia +// UNSUPPORTED: cuda // REQUIRES: gpu,linux,sg-8 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index bd30efe1b217c..a0b4945c769b5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -14,7 +14,7 @@ // Waiting for the commit in IGC to be pulled into the driver to resolve the // test. -// XFAIL: (!igc-dev || gpu-intel-dg2) && run-mode +// XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 #include "common.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp index ff30d4c40f6a7..8643bc8286280 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp @@ -14,7 +14,7 @@ // Waiting for the commit in IGC to be pulled into the driver to resolve the // test. -// XFAIL: (!igc-dev || gpu-intel-dg2) && run-mode +// XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 #include "common.hpp" diff --git a/sycl/test-e2e/MemorySanitizer/check_buffer_host_ptr.cpp b/sycl/test-e2e/MemorySanitizer/check_buffer_host_ptr.cpp deleted file mode 100644 index 4b287a8bb0063..0000000000000 --- a/sycl/test-e2e/MemorySanitizer/check_buffer_host_ptr.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// REQUIRES: linux, cpu || (gpu && level_zero) -// RUN: %{build} %device_msan_flags -O1 -g -o %t2.out -// RUN: %{run} not %t2.out 2>&1 | FileCheck %s -// RUN: %{build} %device_msan_flags -O2 -g -o %t3.out -// RUN: %{run} not %t3.out 2>&1 | FileCheck %s - -#include - -__attribute__((noinline)) long long foo(int data1, long long data2) { - return data1 + data2; -} - -int main() { - sycl::queue q; - int data1[1]; - long long data2[1]; - - { - sycl::buffer buf1(data1, sycl::range<1>(1)); - sycl::buffer buf2(data2, sycl::range<1>(1)); - q.submit([&](sycl::handler &h) { - auto array1 = buf1.get_access(h); - auto array2 = buf2.get_access(h); - h.single_task( - [=]() { array1[0] = foo(array1[0], array2[0]); }); - }).wait(); - // CHECK: use-of-uninitialized-value - // CHECK: kernel <{{.*MyKernel}}> - // CHECK: #0 {{.*}} {{.*check_buffer_host_ptr.cpp}}:[[@LINE-4]] - } - - return 0; -} diff --git a/sycl/test-e2e/MemorySanitizer/check_buffer_memset_memcpy.cpp b/sycl/test-e2e/MemorySanitizer/check_buffer_memset_memcpy.cpp new file mode 100644 index 0000000000000..a1f676a1933ef --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/check_buffer_memset_memcpy.cpp @@ -0,0 +1,64 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -O0 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -O2 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s + +#include + +__attribute__((noinline)) int foo(int data1, int data2) { + return data1 + data2; +} + +void check_memset(sycl::queue &q) { + std::cout << "check_memset" << std::endl; + sycl::buffer buf(sycl::range<1>(2)); + const int Pattern = 0; + + q.submit([&](sycl::handler &h) { + auto array = buf.get_access(h); + h.fill(array, Pattern); + }).wait(); + + q.submit([&](sycl::handler &h) { + auto array = buf.get_access(h); + h.single_task( + [=]() { array[0] = foo(array[0], array[1]); }); + }).wait(); + std::cout << "PASS" << std::endl; + // CHECK-LABEL: check_memset + // CHECK-NOT: use-of-uninitialized-value + // CHECK: PASS +} + +void check_memcpy(sycl::queue &q) { + std::cout << "check_memcpy" << std::endl; + int host[2] = {1, 2}; + sycl::buffer buf1(sycl::range<1>(2)); + sycl::buffer buf2(host, sycl::range<1>(2)); + + q.submit([&](sycl::handler &h) { + auto array1 = buf1.get_access(h); + auto array2 = buf2.get_access(h); + h.copy(array2, array1); + }).wait(); + + q.submit([&](sycl::handler &h) { + auto array = buf1.get_access(h); + h.single_task( + [=]() { array[0] = foo(array[0], array[1]); }); + }).wait(); + std::cout << "PASS" << std::endl; + // CHECK-LABEL: check_memcpy + // CHECK-NOT: use-of-uninitialized-value + // CHECK: PASS +} + +int main() { + sycl::queue q; + + check_memset(q); + check_memcpy(q); + + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/check_device_global.cpp b/sycl/test-e2e/MemorySanitizer/check_device_global.cpp new file mode 100644 index 0000000000000..f8b47569deb9b --- /dev/null +++ b/sycl/test-e2e/MemorySanitizer/check_device_global.cpp @@ -0,0 +1,58 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// RUN: %{build} %device_msan_flags -O0 -g -o %t1.out +// RUN: %{run} not %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -O1 -g -o %t2.out +// RUN: %{run} not %t2.out 2>&1 | FileCheck %s +// RUN: %{build} %device_msan_flags -O2 -g -o %t3.out +// RUN: %{run} not %t3.out 2>&1 | FileCheck %s + +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi; +using namespace sycl::ext::oneapi::experimental; + +sycl::ext::oneapi::experimental::device_global< + int[4], decltype(properties(device_image_scope, host_access_read_write))> + dev_global; + +__attribute__((noinline)) int check(int data) { return data + 1; } + +int main() { + sycl::queue Q; + int *array = sycl::malloc_device(4, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + dev_global[0] = 42; + array[0] = check(dev_global[1]); + array[1] = dev_global[1]; + }); + }).wait(); + + int val[4]; + Q.copy(dev_global, val).wait(); + assert(val[0] == 42); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + array[0] = check(array[1]); + dev_global[1] = array[2]; // uninitialzed value + }); + }).wait(); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + array[0] = dev_global[1]; + check(array[0]); + }); + }).wait(); + // CHECK: use-of-uninitialized-value + // CHECK-NEXT: kernel <{{.*Test3}}> + + sycl::free(array, Q); + + return 0; +} diff --git a/sycl/test-e2e/MemorySanitizer/lit.local.cfg b/sycl/test-e2e/MemorySanitizer/lit.local.cfg index f9437ee4a9048..dcc385637d410 100644 --- a/sycl/test-e2e/MemorySanitizer/lit.local.cfg +++ b/sycl/test-e2e/MemorySanitizer/lit.local.cfg @@ -1,8 +1,10 @@ # TRACKER: https://github.com/intel/llvm/issues/16184 -# TRACKER for PVC: https://github.com/intel/llvm/issues/16401 -#has_arch_gpu_intel_pvc = any('arch-intel_gpu_pvc' in T for T in config.sycl_dev_features.values()) -#if not has_arch_gpu_intel_pvc: -config.unsupported_features += ['gpu'] +has_arch_gpu_intel_pvc = any('arch-intel_gpu_pvc' in T for T in config.sycl_dev_features.values()) +if not has_arch_gpu_intel_pvc: + config.unsupported_features += ['gpu'] +else: + # TRACKER for PVC + igc-dev: https://github.com/intel/llvm/issues/16401 + config.unsupported_features += ['igc-dev'] config.substitutions.append( ("%device_msan_flags", "-Xarch_device -fsanitize=memory") @@ -10,3 +12,6 @@ config.substitutions.append( config.substitutions.append( ("%force_device_msan_rt", "env UR_ENABLE_LAYERS=UR_LAYER_MSAN") ) + +if "-fsanitize=address" in config.cxx_flags: + config.unsupported=True diff --git a/sycl/test-e2e/NewOffloadDriver/multisource.cpp b/sycl/test-e2e/NewOffloadDriver/multisource.cpp index cf9f518c89995..0612b54bfc23b 100644 --- a/sycl/test-e2e/NewOffloadDriver/multisource.cpp +++ b/sycl/test-e2e/NewOffloadDriver/multisource.cpp @@ -11,7 +11,7 @@ // Test with `--offload-new-driver` // RUN: %{build} --offload-new-driver -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL // RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.kernel.o %t.main.o -o %t1.fat +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl %{sycl_target_opts} --offload-new-driver %t.kernel.o %t.main.o -o %t1.fat // RUN: %{run} %t1.fat // Multiple sources with kernel code @@ -19,7 +19,7 @@ // RUN: %{build} --offload-new-driver -c -o %t.init.o -DINIT_KERNEL // RUN: %{build} --offload-new-driver -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} --offload-new-driver -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t2.fat +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl %{sycl_target_opts} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t2.fat // RUN: %{run} %t2.fat // Multiple sources with kernel code with old-style objects @@ -27,7 +27,7 @@ // RUN: %{build} --no-offload-new-driver -c -o %t.init.o -DINIT_KERNEL // RUN: %{build} --no-offload-new-driver -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} --no-offload-new-driver -c -o %t.main.o -DMAIN_APP -// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t3.fat +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl %{sycl_target_opts} --offload-new-driver %t.init.o %t.calc.o %t.main.o -o %t3.fat // RUN: %{run} %t3.fat // Multiple sources with kernel code with old-style objects in a static archive @@ -36,7 +36,7 @@ // RUN: %{build} --no-offload-new-driver -c -o %t.calc.o -DCALC_KERNEL // RUN: %{build} --no-offload-new-driver -c -o %t.main.o -DMAIN_APP // RUN: llvm-ar r %t.a %t.init.o %t.calc.o -// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t.main.o %t.a -o %t4.fat +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl %{sycl_target_opts} --offload-new-driver %t.main.o %t.a -o %t4.fat // RUN: %{run} %t4.fat #include diff --git a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp index 144466f673bba..b2659744d0338 100644 --- a/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp +++ b/sycl/test-e2e/NewOffloadDriver/sycl-external-with-optional-features.cpp @@ -1,7 +1,7 @@ // Test with `--offload-new-driver` // RUN: %{build} -DSOURCE1 --offload-new-driver -c -o %t1.o // RUN: %{build} -DSOURCE2 --offload-new-driver -c -o %t2.o -// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-targets=%{sycl_triple} --offload-new-driver %t1.o %t2.o -o %t.exe +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl %{sycl_target_opts} --offload-new-driver %t1.o %t2.o -o %t.exe // RUN: %{run} %t.exe // XFAIL: cuda // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16413 diff --git a/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp b/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp index 35430c7b12ff6..c929e9261623d 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/illegal_input.cpp @@ -1,5 +1,5 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/trivial.cpp -o %t.out +// RUN: %clangxx -fsycl %{sycl_target_opts} %S/Inputs/trivial.cpp -o %t.out // RUN: not --crash env ONEAPI_DEVICE_SELECTOR="macaroni:*" %{run-unfiltered-devices} %t.out // RUN: not --crash env ONEAPI_DEVICE_SELECTOR=":" %{run-unfiltered-devices} %t.out // RUN: not --crash env ONEAPI_DEVICE_SELECTOR="level_zero:." %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp index 0d80e37e7d9fc..4de91a66941aa 100644 --- a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp +++ b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp @@ -1,5 +1,5 @@ // REQUIRES: level_zero, level_zero_dev_kit, cm-compiler -// XFAIL: gpu && !(arch-intel_gpu_pvc && igc-dev) +// XFAIL: gpu // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16406 // RUN: %{build} -Wno-error=deprecated-declarations -DRUN_KERNELS %level_zero_options -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp index 360592289a969..b0023426f0631 100644 --- a/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp +++ b/sycl/test-e2e/OnlineCompiler/online_compiler_OpenCL.cpp @@ -1,5 +1,5 @@ // REQUIRES: opencl, opencl_icd, cm-compiler -// XFAIL: (gpu && !(arch-intel_gpu_pvc && igc-dev)) || cpu || accelerator +// XFAIL: gpu || cpu || accelerator // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16406 // RUN: %{build} -Wno-error=deprecated-declarations -DRUN_KERNELS %opencl_lib -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_amdgcn.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_amdgcn.cpp index e1616ec80dec0..c0a1cb07db1e1 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_amdgcn.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_amdgcn.cpp @@ -1,4 +1,4 @@ -// REQUIRES: hip_amd, opencl, gpu, cpu +// REQUIRES: hip, opencl, gpu, cpu // REQUIRES: build-and-run-mode // RUN: %clangxx -fsycl -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx906 -fsycl-targets=amdgcn-amd-amdhsa %S/Inputs/is_compatible_with_env.cpp -o %t.out diff --git a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp index 5a04ea4ed55df..ec713c0ab2718 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -DSOURCE1 -c -o %t1.o // RUN: %{build} -DSOURCE2 -c -o %t2.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t1.o %t2.o -Wno-unused-command-line-argument -o %t.exe +// RUN: %clangxx -fsycl %{sycl_target_opts} %t1.o %t2.o -Wno-unused-command-line-argument -o %t.exe // RUN: %{run} %t.exe #ifdef SOURCE1 diff --git a/sycl/test-e2e/Printf/char.cpp b/sycl/test-e2e/Printf/char.cpp index 550186eefea27..f409a5f8150d1 100644 --- a/sycl/test-e2e/Printf/char.cpp +++ b/sycl/test-e2e/Printf/char.cpp @@ -4,7 +4,7 @@ // The test is written using conversion specifiers table from cppreference [1] // [1]: https://en.cppreference.com/w/cpp/io/c/fprintf // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/Printf/double.cpp b/sycl/test-e2e/Printf/double.cpp index f7c0292fc7a21..ab756e6e83372 100644 --- a/sycl/test-e2e/Printf/double.cpp +++ b/sycl/test-e2e/Printf/double.cpp @@ -5,7 +5,7 @@ // [1]: https://en.cppreference.com/w/cpp/io/c/fprintf // // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/Printf/float.cpp b/sycl/test-e2e/Printf/float.cpp index 0643b06684860..070649f99d849 100644 --- a/sycl/test-e2e/Printf/float.cpp +++ b/sycl/test-e2e/Printf/float.cpp @@ -4,7 +4,7 @@ // The test is written using conversion specifiers table from cppreference [1] // [1]: https://en.cppreference.com/w/cpp/io/c/fprintf // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/Printf/int.cpp b/sycl/test-e2e/Printf/int.cpp index 17b3e212c5988..d87d35bbf5186 100644 --- a/sycl/test-e2e/Printf/int.cpp +++ b/sycl/test-e2e/Printf/int.cpp @@ -4,7 +4,7 @@ // The test is written using conversion specifiers table from cppreference [1] // [1]: https://en.cppreference.com/w/cpp/io/c/fprintf // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // FIXME: The 'short' type gets overflown with sporadic values on CUDA. // XFAIL: cuda // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14734 diff --git a/sycl/test-e2e/Printf/mixed-address-space.cpp b/sycl/test-e2e/Printf/mixed-address-space.cpp index d79013007ca03..72d7c009569bd 100644 --- a/sycl/test-e2e/Printf/mixed-address-space.cpp +++ b/sycl/test-e2e/Printf/mixed-address-space.cpp @@ -1,7 +1,7 @@ // This test is written with an aim to check that experimental::printf versions // for constant and generic address space can be used in the same module. // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // XFAIL: cuda && windows // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14733 // FIXME: Drop the test once generic AS support is considered stable and the diff --git a/sycl/test-e2e/Printf/percent-symbol.cpp b/sycl/test-e2e/Printf/percent-symbol.cpp index f08cd3e085d0d..ea3e1ea40a925 100644 --- a/sycl/test-e2e/Printf/percent-symbol.cpp +++ b/sycl/test-e2e/Printf/percent-symbol.cpp @@ -4,7 +4,7 @@ // The test is written using conversion specifiers table from cppreference [1] // [1]: https://en.cppreference.com/w/cpp/io/c/fprintf // -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // XFAIL: cuda && windows // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14733 // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/Properties/cache_config.cpp b/sycl/test-e2e/Properties/cache_config.cpp index 0cda3e97a5d1f..666f85631c9b1 100644 --- a/sycl/test-e2e/Properties/cache_config.cpp +++ b/sycl/test-e2e/Properties/cache_config.cpp @@ -1,6 +1,9 @@ // REQUIRES: gpu, level_zero -// RUN: %{build} -o %t.out +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is +// resolved. +// RUN: %{build} -o %t.out -Wno-deprecated-declarations // RUN: env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s #include @@ -36,6 +39,14 @@ struct NegativeKernelFunctor { auto get(properties_tag) const { return properties{}; } }; +struct RangeKernelFunctor { + + RangeKernelFunctor() {} + + void operator()(id<2> i) const {} + auto get(properties_tag) const { return properties{cache_config(large_slm)}; } +}; + int main() { sycl::property_list q_prop{sycl::property::queue::in_order()}; queue q{q_prop}; @@ -43,22 +54,10 @@ int main() { sycl::ext::oneapi::experimental::properties properties{ cache_config(large_slm)}; - // CHECK: single_task - // CHECK: ZE ---> zeKernelSetCacheConfig - std::cout << "single_task" << std::endl; - q.single_task(properties, [=]() {}).wait(); - // CHECK: parallel_for with sycl::range // CHECK: ZE ---> zeKernelSetCacheConfig std::cout << "parallel_for with sycl::range" << std::endl; - q.parallel_for(range<2>{16, 16}, properties, [=](id<2> i) {}).wait(); - - // CHECK: parallel_for with sycl::nd_range - // CHECK: ZE ---> zeKernelSetCacheConfig - std::cout << "parallel_for with sycl::nd_range" << std::endl; - q.parallel_for(nd_range<2>{range<2>(4, 4), range<2>(2, 2)}, properties, - [=](nd_item<2> i) {}) - .wait(); + q.parallel_for(range<2>{16, 16}, RangeKernelFunctor{}).wait(); // CHECK: parallel_for_work_group(range, func) // CHECK: ZE ---> zeKernelSetCacheConfig diff --git a/sycl/test-e2e/README.md b/sycl/test-e2e/README.md index 396aa9ef7341a..5d19795212305 100644 --- a/sycl/test-e2e/README.md +++ b/sycl/test-e2e/README.md @@ -67,7 +67,7 @@ is substituted with just `[Optional run_launcher if that is configured]`. Another little nuance is `%{sycl_triple}` substitution. It is constructed by concatenating triples for all the devices from `sycl_devices` supported by a given test. After that there is also a convenient `%{build}` substitution that -is equivalent to `%clangxx -fsycl -fsycl-targets=%{sycl_triple} %s`. +is equivalent to `%clangxx -fsycl %{sycl_target_opts} %s`. ## Prerequisites @@ -184,12 +184,6 @@ at the full path specified by this variable. ***CUDA_LIBS_DIR*** - path to CUDA libraries. -***HIP_PLATFORM*** - platform selection for HIP targeted devices. -Defaults to AMD if no value is given. Supported values are: - -* **AMD** - for HIP to target AMD GPUs -* **NVIDIA** - for HIP to target NVIDIA GPUs - ***AMD_ARCH*** - flag may be set for when using HIP AMD triple. For example it may be set to "gfx906". Otherwise must be provided via the ***amd_arch*** LIT parameter (e.g., ***--param amd_arch=gfx906***) at runtime via the command line diff --git a/sycl/test-e2e/Regression/DAE-separate-compile.cpp b/sycl/test-e2e/Regression/DAE-separate-compile.cpp index 64e19ec8f90ad..d787c288d64df 100644 --- a/sycl/test-e2e/Regression/DAE-separate-compile.cpp +++ b/sycl/test-e2e/Regression/DAE-separate-compile.cpp @@ -5,12 +5,11 @@ // The test checks that the scenario works correctly. // // RUN: %{build} -O2 -c -o %t.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.o %O0 -Wno-unused-command-line-argument -o %t.out +// RUN: %clangxx -fsycl %{sycl_target_opts} %t.o %O0 -Wno-unused-command-line-argument -o %t.out // RUN: %{run} %t.out // Failing on HIP AMD, enable after fixed -// UNSUPPORTED: hip_amd - +// UNSUPPORTED: hip #include #include diff --git a/sycl/test-e2e/Regression/commandlist/gpu.cpp b/sycl/test-e2e/Regression/commandlist/gpu.cpp index 552f0f1109b1e..02db522a23943 100644 --- a/sycl/test-e2e/Regression/commandlist/gpu.cpp +++ b/sycl/test-e2e/Regression/commandlist/gpu.cpp @@ -1,4 +1,4 @@ // REQUIRES: gpu -// RUN: %clangxx -Wno-error=vla-cxx-extension -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out %threads_lib +// RUN: %clangxx -Wno-error=vla-cxx-extension -fsycl %{sycl_target_opts} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out %threads_lib // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Regression/local-arg-align.cpp b/sycl/test-e2e/Regression/local-arg-align.cpp index 4eca3aeff7f84..3959570a3e44f 100644 --- a/sycl/test-e2e/Regression/local-arg-align.cpp +++ b/sycl/test-e2e/Regression/local-arg-align.cpp @@ -2,7 +2,7 @@ // // RUN: %{run} %t.out -// UNSUPPORTED: true +// UNSUPPORTED: system-windows // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/10682 //==-- local-arg-align.cpp - Test for local argument alignmnent ------------==// diff --git a/sycl/test-e2e/Regression/multiple-targets.cpp b/sycl/test-e2e/Regression/multiple-targets.cpp index a2498c3301b99..aa8c125d90738 100644 --- a/sycl/test-e2e/Regression/multiple-targets.cpp +++ b/sycl/test-e2e/Regression/multiple-targets.cpp @@ -4,16 +4,16 @@ // // REQUIRES: cuda || hip || native_cpu // REQUIRES: build-and-run-mode -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -o %t1.out %s +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 %if any-device-is-hip %{ %{hip_arch_opts} %} -o %t1.out %s // RUN: %{run} %t1.out // -// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -o %t2.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} %if any-device-is-hip %{ %{hip_arch_opts} %} -o %t2.out %s // RUN: %{run} %t2.out // -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -fsycl-device-code-split=per_kernel -o %t3.out %s +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 %if any-device-is-hip %{ %{hip_arch_opts} %} -fsycl-device-code-split=per_kernel -o %t3.out %s // RUN: %{run} %t3.out // -// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t4.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} %if any-device-is-hip %{ %{hip_arch_opts} %} -fsycl-device-code-split=per_kernel -o %t4.out %s // RUN: %{run} %t4.out #include diff --git a/sycl/test-e2e/Regression/multithread_write_accessor.cpp b/sycl/test-e2e/Regression/multithread_write_accessor.cpp index 87299ed3e4d5c..b1d927517079c 100644 --- a/sycl/test-e2e/Regression/multithread_write_accessor.cpp +++ b/sycl/test-e2e/Regression/multithread_write_accessor.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out %threads_lib // RUN: %{run} %t.out -// XFAIL: arch-intel_gpu_pvc +// XFAIL: arch-intel_gpu_pvc && opencl // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16401 #include diff --git a/sycl/test-e2e/Regression/static-buffer-dtor.cpp b/sycl/test-e2e/Regression/static-buffer-dtor.cpp index 8ff9328d6535d..e84d3a062978b 100644 --- a/sycl/test-e2e/Regression/static-buffer-dtor.cpp +++ b/sycl/test-e2e/Regression/static-buffer-dtor.cpp @@ -13,7 +13,7 @@ // RUN: %{run} %t.out // Failing on HIP AMD -// UNSUPPORTED: hip_amd +// UNSUPPORTED: hip // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows diff --git a/sycl/test-e2e/Sampler/normalized-clampedge-nearest.cpp b/sycl/test-e2e/Sampler/normalized-clampedge-nearest.cpp index 6f349254dda55..b1f13e0b63140 100644 --- a/sycl/test-e2e/Sampler/normalized-clampedge-nearest.cpp +++ b/sycl/test-e2e/Sampler/normalized-clampedge-nearest.cpp @@ -4,7 +4,7 @@ // // Missing __spirv_ImageWrite, __spirv_SampledImage, // __spirv_ImageSampleExplicitLod on AMD -// XFAIL: hip_amd +// XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14732 /* diff --git a/sycl/test-e2e/SeparateCompile/same-kernel.cpp b/sycl/test-e2e/SeparateCompile/same-kernel.cpp index 8bdadbe7ad62b..27b701a0b1550 100644 --- a/sycl/test-e2e/SeparateCompile/same-kernel.cpp +++ b/sycl/test-e2e/SeparateCompile/same-kernel.cpp @@ -12,7 +12,7 @@ // RUN: %{build} -DB_CPP=1 -c -o %t-same-kernel-b.o // // >> ---- link the full hetero app -// RUN: %clangxx %t-same-kernel-a.o %t-same-kernel-b.o -Wno-unused-command-line-argument -o %t-same-kernel.exe -fsycl -fsycl-targets=%{sycl_triple} +// RUN: %clangxx -fsycl %{sycl_target_opts} %t-same-kernel-a.o %t-same-kernel-b.o -Wno-unused-command-line-argument -o %t-same-kernel.exe // RUN: %{run} %t-same-kernel.exe #include diff --git a/sycl/test-e2e/SeparateCompile/sycl-external-within-staticlib.cpp b/sycl/test-e2e/SeparateCompile/sycl-external-within-staticlib.cpp index 219634f47646c..d62becf4d5567 100644 --- a/sycl/test-e2e/SeparateCompile/sycl-external-within-staticlib.cpp +++ b/sycl/test-e2e/SeparateCompile/sycl-external-within-staticlib.cpp @@ -5,7 +5,7 @@ // RUN: %{build} -O3 -DSOURCE3 -c -o %t3.o // RUN: rm -f %t.a // RUN: llvm-ar crv %t.a %t1.o %t2.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -O3 %t3.o %t.a -Wno-unused-command-line-argument -o %t1.exe +// RUN: %clangxx -fsycl %{sycl_target_opts} -O3 %t3.o %t.a -Wno-unused-command-line-argument -o %t1.exe // RUN: %{run} %t1.exe // Check the repacked case as it can behave differently. @@ -13,7 +13,7 @@ // RUN: echo addlib %t.a >> %t.txt // RUN: echo save >> %t.txt // RUN: cat %t.txt | llvm-ar -M -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -O3 %t3.o %t_repacked.a -Wno-unused-command-line-argument -o %t2.exe +// RUN: %clangxx -fsycl %{sycl_target_opts} -O3 %t3.o %t_repacked.a -Wno-unused-command-line-argument -o %t2.exe // RUN: %{run} %t2.exe #include diff --git a/sycl/test-e2e/SeparateCompile/sycl-external.cpp b/sycl/test-e2e/SeparateCompile/sycl-external.cpp index 37facb7ecfc57..85e2c97d6512a 100644 --- a/sycl/test-e2e/SeparateCompile/sycl-external.cpp +++ b/sycl/test-e2e/SeparateCompile/sycl-external.cpp @@ -2,14 +2,14 @@ // different object file. // RUN: %{build} -DSOURCE1 -c -o %t1.o // RUN: %{build} -DSOURCE2 -c -o %t2.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t1.o %t2.o -Wno-unused-command-line-argument -o %t1.exe +// RUN: %clangxx -fsycl %{sycl_target_opts} %t1.o %t2.o -Wno-unused-command-line-argument -o %t1.exe // RUN: %{run} %t1.exe // // Test2 - check that kernel can call a SYCL_EXTERNAL function defined in a // static library. // RUN: rm -f %t.a // RUN: llvm-ar crv %t.a %t1.o -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t2.o %t.a -Wno-unused-command-line-argument -o %t2.exe +// RUN: %clangxx -fsycl %{sycl_target_opts} %t2.o %t.a -Wno-unused-command-line-argument -o %t2.exe // RUN: %{run} %t2.exe #include diff --git a/sycl/test-e2e/Tracing/usm/queue_copy_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_copy_released_pointer.cpp index a37e5b8f4238c..86992e63e57fb 100644 --- a/sycl/test-e2e/Tracing/usm/queue_copy_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_copy_released_pointer.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: windows || hip_amd +// UNSUPPORTED: windows || hip // RUN: %{build} -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp index 4c4299dd93d8e..d7407bbeeab97 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: windows || hip_amd +// UNSUPPORTED: windows || hip // RUN: %{build} -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp index 61e27b7927f7b..4444ee1b7b903 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: windows || hip_amd +// UNSUPPORTED: windows || hip // RUN: %{build} -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s diff --git a/sycl/test-e2e/USM/memadvise_flags.cpp b/sycl/test-e2e/USM/memadvise_flags.cpp index 7de2a8a931f30..df2a1b31532ad 100644 --- a/sycl/test-e2e/USM/memadvise_flags.cpp +++ b/sycl/test-e2e/USM/memadvise_flags.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t1.out -// REQUIRES: cuda || hip_amd +// REQUIRES: cuda || hip // RUN: %{run} %t1.out //==---------------- memadvise_flags.cpp -----------------------------------==// diff --git a/sycl/test-e2e/USM/memops2d/copy2d_dhost_to_shared.cpp b/sycl/test-e2e/USM/memops2d/copy2d_dhost_to_shared.cpp index 317447d645b67..90eaa14189ae1 100644 --- a/sycl/test-e2e/USM/memops2d/copy2d_dhost_to_shared.cpp +++ b/sycl/test-e2e/USM/memops2d/copy2d_dhost_to_shared.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "copy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/copy2d_host_to_shared.cpp b/sycl/test-e2e/USM/memops2d/copy2d_host_to_shared.cpp index 48bf7fe13abb6..fa39cfdcaa6f4 100644 --- a/sycl/test-e2e/USM/memops2d/copy2d_host_to_shared.cpp +++ b/sycl/test-e2e/USM/memops2d/copy2d_host_to_shared.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "copy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/copy2d_shared_to_dhost.cpp b/sycl/test-e2e/USM/memops2d/copy2d_shared_to_dhost.cpp index ab766f59c3d10..85498b3a9f993 100644 --- a/sycl/test-e2e/USM/memops2d/copy2d_shared_to_dhost.cpp +++ b/sycl/test-e2e/USM/memops2d/copy2d_shared_to_dhost.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "copy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/copy2d_shared_to_host.cpp b/sycl/test-e2e/USM/memops2d/copy2d_shared_to_host.cpp index de99f08d24096..26497825c4f51 100644 --- a/sycl/test-e2e/USM/memops2d/copy2d_shared_to_host.cpp +++ b/sycl/test-e2e/USM/memops2d/copy2d_shared_to_host.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "copy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/memcpy2d_dhost_to_shared.cpp b/sycl/test-e2e/USM/memops2d/memcpy2d_dhost_to_shared.cpp index f01317710c35d..ca57afcc1e206 100644 --- a/sycl/test-e2e/USM/memops2d/memcpy2d_dhost_to_shared.cpp +++ b/sycl/test-e2e/USM/memops2d/memcpy2d_dhost_to_shared.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "memcpy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/memcpy2d_host_to_shared.cpp b/sycl/test-e2e/USM/memops2d/memcpy2d_host_to_shared.cpp index 0418678424dfc..51916927f3bed 100644 --- a/sycl/test-e2e/USM/memops2d/memcpy2d_host_to_shared.cpp +++ b/sycl/test-e2e/USM/memops2d/memcpy2d_host_to_shared.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "memcpy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_dhost.cpp b/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_dhost.cpp index 7e2dcb8a9bd4d..279d62c51cb87 100644 --- a/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_dhost.cpp +++ b/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_dhost.cpp @@ -10,7 +10,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 // Temporarily disabled until the failure is addressed. diff --git a/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_host.cpp b/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_host.cpp index 645adac407f90..f30e80c39b4dd 100644 --- a/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_host.cpp +++ b/sycl/test-e2e/USM/memops2d/memcpy2d_shared_to_host.cpp @@ -13,7 +13,7 @@ // Temporarily disabled until the failure is addressed. // UNSUPPORTED: (level_zero && windows) -// UNSUPPORTED: (gpu-intel-dg2 || hip_amd) && linux +// UNSUPPORTED: (gpu-intel-dg2 || hip) && linux // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15648 #include "memcpy2d_common.hpp" diff --git a/sycl/test-e2e/USM/memory_coherency_hip.cpp b/sycl/test-e2e/USM/memory_coherency_hip.cpp index a6cca6620deb7..e060b018025d7 100644 --- a/sycl/test-e2e/USM/memory_coherency_hip.cpp +++ b/sycl/test-e2e/USM/memory_coherency_hip.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t1.out -// REQUIRES: hip_amd +// REQUIRES: hip // RUN: %{run} %t1.out //==---- memory_coherency_hip.cpp -----------------------------------------==// diff --git a/sycl/test-e2e/VirtualFunctions/2/1/1/missing-overrides.cpp b/sycl/test-e2e/VirtualFunctions/2/1/1/missing-overrides.cpp index f198bc94f855f..b008effc626c0 100644 --- a/sycl/test-e2e/VirtualFunctions/2/1/1/missing-overrides.cpp +++ b/sycl/test-e2e/VirtualFunctions/2/1/1/missing-overrides.cpp @@ -66,6 +66,25 @@ void applyOp(int *DataPtr, Base *ObjPtr) { ObjPtr->multiply(DataPtr); } +template struct KernelFunctor { + T1 mStorageAcc; + T2 mDataAcc; + unsigned mTestCase; + KernelFunctor(T1 &StorageAcc, T2 &DataAcc, unsigned TestCase) + : mStorageAcc(StorageAcc), mDataAcc(DataAcc), mTestCase(TestCase) {} + + void operator()() const { + auto *Ptr = + mStorageAcc[0].template construct(mTestCase); + applyOp( + mDataAcc.template get_multi_ptr().get(), + Ptr); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t(TestCase); - applyOp(DataAcc.get_multi_ptr().get(), - Ptr); - }); + CGH.single_task(KernelFunctor(StorageAcc, DataAcc, TestCase)); }); Base *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/2/1/1/more-complex-hierarchy.cpp b/sycl/test-e2e/VirtualFunctions/2/1/1/more-complex-hierarchy.cpp index bb334972c3f77..d1c2c5fd092a9 100644 --- a/sycl/test-e2e/VirtualFunctions/2/1/1/more-complex-hierarchy.cpp +++ b/sycl/test-e2e/VirtualFunctions/2/1/1/more-complex-hierarchy.cpp @@ -45,6 +45,25 @@ class IncrementBy8 : public IncrementOp { void applyOp(int *Data, AbstractOp *Obj) { Obj->applyOp(Data); } +template struct KernelFunctor { + T1 mStorageAcc; + T2 mDataAcc; + unsigned mTestCase; + KernelFunctor(T1 &StorageAcc, T2 &DataAcc, unsigned TestCase) + : mStorageAcc(StorageAcc), mDataAcc(DataAcc), mTestCase(TestCase) {} + + void operator()() const { + auto *Ptr = mStorageAcc[0].template construct( + mTestCase); + applyOp( + mDataAcc.template get_multi_ptr().get(), + Ptr); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -59,7 +78,6 @@ int main() try { sycl::queue q(asyncHandler); - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (unsigned TestCase = 0; TestCase < 4; ++TestCase) { int HostData = 42; int Data = HostData; @@ -68,12 +86,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor StorageAcc(DeviceStorage, CGH, sycl::write_only); sycl::accessor DataAcc(DataStorage, CGH, sycl::write_only); - CGH.single_task(props, [=]() { - auto *Ptr = - StorageAcc[0].construct(TestCase); - applyOp(DataAcc.get_multi_ptr().get(), - Ptr); - }); + CGH.single_task(KernelFunctor(StorageAcc, DataAcc, TestCase)); }); auto *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/2/1/1/simple-hierarchy.cpp b/sycl/test-e2e/VirtualFunctions/2/1/1/simple-hierarchy.cpp index 2bfb3dd0f010d..aad1e1ccecffa 100644 --- a/sycl/test-e2e/VirtualFunctions/2/1/1/simple-hierarchy.cpp +++ b/sycl/test-e2e/VirtualFunctions/2/1/1/simple-hierarchy.cpp @@ -30,6 +30,24 @@ class IncrementBy8 : public BaseIncrement { void increment(int *Data) override { *Data += 8; } }; +template struct KernelFunctor { + T1 mStorageAcc; + T2 mDataAcc; + unsigned mTestCase; + KernelFunctor(T1 &StorageAcc, T2 &DataAcc, unsigned TestCase) + : mStorageAcc(StorageAcc), mDataAcc(DataAcc), mTestCase(TestCase) {} + void operator()() const { + auto *Ptr = + mStorageAcc[0].template construct( + mTestCase); + Ptr->increment( + mDataAcc.template get_multi_ptr().get()); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -44,7 +62,6 @@ int main() try { sycl::queue q(asyncHandler); - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (unsigned TestCase = 0; TestCase < 4; ++TestCase) { int HostData = 42; int Data = HostData; @@ -53,12 +70,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor StorageAcc(DeviceStorage, CGH, sycl::write_only); sycl::accessor DataAcc(DataStorage, CGH, sycl::write_only); - CGH.single_task(props, [=]() { - auto *Ptr = - StorageAcc[0].construct(TestCase); - Ptr->increment( - DataAcc.get_multi_ptr().get()); - }); + CGH.single_task(KernelFunctor(StorageAcc, DataAcc, TestCase)); }); auto *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/2/2/single-construct-single-use.cpp b/sycl/test-e2e/VirtualFunctions/2/2/single-construct-single-use.cpp index ccf0c77036085..467d4e5b006c1 100644 --- a/sycl/test-e2e/VirtualFunctions/2/2/single-construct-single-use.cpp +++ b/sycl/test-e2e/VirtualFunctions/2/2/single-construct-single-use.cpp @@ -57,6 +57,22 @@ class IncrementBy16 : public BaseIncrement { void increment(int *Data) override { *Data += 16 + Mod; } }; +template struct KernelFunctor { + T1 mStorageAcc; + T2 mDataAcc; + KernelFunctor(T1 &StorageAcc, T2 &DataAcc) + : mStorageAcc(StorageAcc), mDataAcc(DataAcc) {} + void operator()() const { + auto *Ptr = mStorageAcc[0].template getAs(); + Ptr->increment( + mDataAcc.template get_multi_ptr().get()); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{ + oneapi::assume_indirect_calls_to}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -72,8 +88,6 @@ int main() try { sycl::queue q(asyncHandler); // TODO: cover uses case when objects are passed through USM - constexpr oneapi::properties props{ - oneapi::assume_indirect_calls_to}; for (unsigned TestCase = 0; TestCase < 5; ++TestCase) { int HostData = 42; int Data = HostData; @@ -90,11 +104,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor StorageAcc(DeviceStorage, CGH, sycl::read_write); sycl::accessor DataAcc(DataStorage, CGH, sycl::write_only); - CGH.single_task(props, [=]() { - auto *Ptr = StorageAcc[0].getAs(); - Ptr->increment( - DataAcc.get_multi_ptr().get()); - }); + CGH.single_task(KernelFunctor(StorageAcc, DataAcc)); }); auto *Ptr = diff --git a/sycl/test-e2e/VirtualFunctions/misc/math.cpp b/sycl/test-e2e/VirtualFunctions/misc/math.cpp index 71b34c23cef1f..da0570ce0291e 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/math.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/math.cpp @@ -40,6 +40,21 @@ class RoundOp : public BaseOp { virtual float apply(float V) { return sycl::round(V); } }; +template struct KernelFunctor { + T1 mDataAcc; + T2 mDeviceStorage; + KernelFunctor(T1 &DataAcc, T2 &DeviceStorage) + : mDataAcc(DataAcc), mDeviceStorage(DeviceStorage) {} + + void operator()() const { + auto *Ptr = mDeviceStorage->template getAs(); + mDataAcc[0] = Ptr->apply(mDataAcc[0]); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -49,7 +64,6 @@ int main() try { auto *DeviceStorage = sycl::malloc_shared(1, q); - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (unsigned TestCase = 0; TestCase < 3; ++TestCase) { float HostData = 3.56; float Data = HostData; @@ -63,10 +77,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write); - CGH.single_task(props, [=]() { - auto *Ptr = DeviceStorage->getAs(); - DataAcc[0] = Ptr->apply(DataAcc[0]); - }); + CGH.single_task(KernelFunctor(DataAcc, DeviceStorage)); }); auto *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index a69810145507d..29e89e759bb96 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -13,14 +13,11 @@ import re -def get_triple(test, backend): +def get_triple(backend): if backend == "cuda": return "nvptx64-nvidia-cuda" if backend == "hip": - if test.config.hip_platform == "NVIDIA": - return "nvptx64-nvidia-cuda" - else: - return "amdgcn-amd-amdhsa" + return "amdgcn-amd-amdhsa" if backend == "native_cpu": return "native_cpu" return "spir64" @@ -171,17 +168,27 @@ def execute(self, test, litConfig): for sycl_device in devices_for_test: (backend, _) = sycl_device.split(":") - triples.add(get_triple(test, backend)) + triples.add(get_triple(backend)) substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase) + substitutions.append(("%{sycl_triple}", format(",".join(triples)))) - # -fsycl-targets is needed for CUDA/HIP, so just use it be default so - # -that new tests by default would runnable there (unless they have - # -other restrictions). + + sycl_target_opts = "-fsycl-targets=%{sycl_triple}" + if get_triple("hip") in triples: + hip_arch_opts = ( + " -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch={}".format( + test.config.amd_arch + ) + ) + sycl_target_opts += hip_arch_opts + substitutions.append(("%{hip_arch_opts}", hip_arch_opts)) + substitutions.append(("%{sycl_target_opts}", sycl_target_opts)) + substitutions.append( ( "%{build}", - "%clangxx -fsycl -fsycl-targets=%{sycl_triple} %verbose_print %s", + "%clangxx -fsycl %{sycl_target_opts} %verbose_print %s", ) ) if platform.system() == "Windows": diff --git a/sycl/test-e2e/forward_progress/forward_progress_kernel_param_L0_gpu.cpp b/sycl/test-e2e/forward_progress/forward_progress_kernel_param_L0_gpu.cpp index 003840a8c1299..b8b80b9541569 100644 --- a/sycl/test-e2e/forward_progress/forward_progress_kernel_param_L0_gpu.cpp +++ b/sycl/test-e2e/forward_progress/forward_progress_kernel_param_L0_gpu.cpp @@ -23,41 +23,42 @@ void check_props(sycl::queue &q) {} // Full specializations for each progress guarantee +template struct KernelFunctor { + T props; + KernelFunctor(const T &props_) : props(props_) {} + void operator()() const {} + auto get(properties_tag) const { return props; } +}; + template <> void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::parallel; // Check properties at execution_scope::root_group coordination level - q.single_task( - properties{work_group_progress}, - [=]() {}); - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_group_progress})); + q.single_task(KernelFunctor( + properties{sub_group_progress})); try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::work_group coordination level - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{sub_group_progress})); try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::sub_group coordination level try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); } catch (sycl::exception &ex) { } } @@ -66,66 +67,54 @@ template <> void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::weakly_parallel; // Check properties at execution_scope::root_group coordination level - q.single_task( - properties{work_group_progress}, - [=]() {}); - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_group_progress})); + q.single_task(KernelFunctor( + properties{sub_group_progress})); - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); // Check properties at execution_scope::work_group coordination level - q.single_task( - properties{sub_group_progress}, - [=]() {}); - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{sub_group_progress})); + q.single_task(KernelFunctor( + properties{work_item_progress})); // Check properties at execution_scope::sub_group coordination level - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); } template <> void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::concurrent; // Check properties at execution_scope::root_group coordination level - q.single_task( - properties{work_group_progress}, - [=]() {}); - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_group_progress})); + q.single_task(KernelFunctor( + properties{sub_group_progress})); try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::work_group coordination level - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{sub_group_progress})); try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::sub_group coordination level try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } diff --git a/sycl/test-e2e/forward_progress/forward_progress_kernel_param_ocl_cpu.cpp b/sycl/test-e2e/forward_progress/forward_progress_kernel_param_ocl_cpu.cpp index ffdd99184d233..8647d42ee24c3 100644 --- a/sycl/test-e2e/forward_progress/forward_progress_kernel_param_ocl_cpu.cpp +++ b/sycl/test-e2e/forward_progress/forward_progress_kernel_param_ocl_cpu.cpp @@ -22,49 +22,50 @@ void check_props(sycl::queue &q) {} // Full specializations for each progress guarantee +template struct KernelFunctor { + T props; + KernelFunctor(const T &props_) : props(props_) {} + void operator()() const {} + auto get(properties_tag) const { return props; } +}; + template <> void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::parallel; // Check properties at execution_scope::root_group coordination level - q.single_task( - properties{work_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_group_progress})); try { - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + sub_group_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::work_group coordination level try { - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + sub_group_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::sub_group coordination level try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } @@ -74,29 +75,23 @@ template <> void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::weakly_parallel; // Check properties at execution_scope::root_group coordination level - q.single_task( - properties{work_group_progress}, - [=]() {}); - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_group_progress})); + q.single_task(KernelFunctor( + properties{sub_group_progress})); - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); // Check properties at execution_scope::work_group coordination level - q.single_task( - properties{sub_group_progress}, - [=]() {}); - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{sub_group_progress})); + q.single_task(KernelFunctor( + properties{work_item_progress})); // Check properties at execution_scope::sub_group coordination level - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); } template <> @@ -104,48 +99,42 @@ void check_props(sycl::queue &q) { constexpr auto guarantee = forward_progress_guarantee::concurrent; // Check properties at execution_scope::root_group coordination level try { - q.single_task( - properties{work_group_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_group_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } try { - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + sub_group_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::work_group coordination level try { - q.single_task( - properties{sub_group_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + sub_group_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor(properties{ + work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } // Check properties at execution_scope::sub_group coordination level try { - q.single_task( - properties{work_item_progress}, - [=]() {}); + q.single_task(KernelFunctor( + properties{work_item_progress})); assert(false && "Expected exception not seen!"); } catch (sycl::exception &ex) { } diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index a374cfaee402f..9ffb648c6bc7c 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -524,18 +524,6 @@ def open_check_file(file_name): if be not in available_devices or dev not in available_devices[be]: lit_config.error("Unsupported device {}".format(d)) -# If HIP_PLATFORM flag is not set, default to AMD, and check if HIP platform is supported -supported_hip_platforms = ["AMD", "NVIDIA"] -if config.hip_platform == "": - config.hip_platform = "AMD" -if config.hip_platform not in supported_hip_platforms: - lit_config.error( - "Unknown HIP platform '" - + config.hip_platform - + "' supported platforms are " - + ", ".join(supported_hip_platforms) - ) - if "cuda:gpu" in config.sycl_devices: if "CUDA_PATH" not in os.environ: if platform.system() == "Windows": @@ -697,8 +685,6 @@ def open_check_file(file_name): # discovered already. config.sycl_dev_features = {} -# Architecture flag for compiling for AMD HIP devices. Empty otherwise. -arch_flag = "" # Version of the driver for a given device. Empty for non-Intel devices. config.intel_driver_ver = {} for sycl_device in config.sycl_devices: @@ -839,7 +825,7 @@ def open_check_file(file_name): # Use short names for LIT rules. features.add(be) - if be == "hip" and config.hip_platform == "AMD": + if be == "hip": if not config.amd_arch: # Guaranteed to be a single element in the set arch = [x for x in architecture_feature][0] @@ -850,15 +836,9 @@ def open_check_file(file_name): ) config.amd_arch = arch.replace(amd_arch_prefix, "") llvm_config.with_system_environment("ROCM_PATH") - config.available_features.add("hip_amd") - arch_flag = ( - "-Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=" + config.amd_arch - ) config.substitutions.append( ("%rocm_path", os.environ.get("ROCM_PATH", "/opt/rocm")) ) - elif be == "hip" and config.hip_platform == "NVIDIA": - config.available_features.add("hip_nvidia") config.sycl_dev_features[sycl_device] = features.union(config.available_features) if is_intel_driver: @@ -871,10 +851,7 @@ def open_check_file(file_name): config.substitutions.append(("%clang", " true ")) else: config.substitutions.append( - ( - "%clangxx", - " " + config.dpcpp_compiler + " " + config.cxx_flags + " " + arch_flag, - ) + ("%clangxx", " " + config.dpcpp_compiler + " " + config.cxx_flags) ) config.substitutions.append( ("%clang", " " + config.dpcpp_compiler + " " + config.c_flags) diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index a6b86cb73d505..00928dd9141fc 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -30,7 +30,6 @@ config.igc_tag_file = os.path.join("/usr/local/lib/igc/", 'IGCTAG.txt') config.sycl_devices = lit_config.params.get("sycl_devices", "@SYCL_TEST_E2E_TARGETS@").split(';') -config.hip_platform = "@HIP_PLATFORM@" config.amd_arch = lit_config.params.get("amd_arch", "@AMD_ARCH@") config.sycl_threads_lib = '@SYCL_THREADS_LIB@' config.extra_environment = lit_config.params.get("extra_environment", "@LIT_EXTRA_ENVIRONMENT@") diff --git a/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp b/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp index eca55f738d83a..d93a7880d404e 100644 --- a/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp +++ b/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp @@ -2,6 +2,6 @@ // TODO: Supported for ROCM 5. Further development required to support AMDGPU. // UNSUPPORTED: hip -// RUN: %clangxx -fPIC -shared -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/kernel_module.cpp -o %t.so -// RUN: %clangxx -DTEST_SHARED_LIB='"%t.so"' -ldl -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/kernel_function.cpp -o %t.out +// RUN: %clangxx -fPIC -shared -fsycl %{sycl_target_opts} %S/Inputs/kernel_module.cpp -o %t.so +// RUN: %clangxx -DTEST_SHARED_LIB='"%t.so"' -ldl -fsycl %{sycl_target_opts} %S/Inputs/kernel_function.cpp -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/syclcompat/kernel/kernel_win.cpp b/sycl/test-e2e/syclcompat/kernel/kernel_win.cpp index 02ec26ab78a48..85ecf5687ca63 100644 --- a/sycl/test-e2e/syclcompat/kernel/kernel_win.cpp +++ b/sycl/test-e2e/syclcompat/kernel/kernel_win.cpp @@ -3,6 +3,6 @@ // DEFINE: %{sharedflag} = %if cl_options %{/clang:-shared%} %else %{-shared%} -// RUN: %clangxx %{sharedflag} -fsycl -fsycl-targets=%{sycl_triple} %S\Inputs\kernel_module.cpp -o %t.dll -// RUN: %clangxx -DTEST_SHARED_LIB='"%/t.dll"' -fsycl -fsycl-targets=%{sycl_triple} %S\Inputs\kernel_function.cpp -o %t.out +// RUN: %clangxx %{sharedflag} -fsycl %{sycl_target_opts} %S\Inputs\kernel_module.cpp -o %t.dll +// RUN: %clangxx -DTEST_SHARED_LIB='"%/t.dll"' -fsycl %{sycl_target_opts} %S\Inputs\kernel_function.cpp -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp index 033f5c99d74e1..fa253b8f1666f 100644 --- a/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp +++ b/sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp @@ -23,12 +23,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out -// UNSUPPORTED: linux && opencl && (gpu-intel-gen12 || gpu-intel-dg2) +// UNSUPPORTED: linux && opencl && (gpu-intel-gen12 || gpu-intel-dg2 || arch-intel_gpu_pvc) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15275 -// XFAIL: arch-intel_gpu_pvc -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16401 - #include #include #include diff --git a/sycl/test-e2e/syclcompat/math/math_emu_simd_from_syclomatic.cpp b/sycl/test-e2e/syclcompat/math/math_emu_simd_from_syclomatic.cpp index b9b274aa2442b..6a850a887eb18 100644 --- a/sycl/test-e2e/syclcompat/math/math_emu_simd_from_syclomatic.cpp +++ b/sycl/test-e2e/syclcompat/math/math_emu_simd_from_syclomatic.cpp @@ -29,7 +29,7 @@ void checkResult(const string &FuncName, const vector &Inputs, for (size_t i = 1; i < Inputs.size(); ++i) { cout << ", " << Inputs[i]; } - cout << ") = " << DeviceResult << " (expect " << Expect << ")"; + cout << ") = " << DeviceResult << " (expect " << Expect << ")" << endl; assert(DeviceResult == Expect); } @@ -43,19 +43,24 @@ void testVabs2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabs2(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vabs2", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabs2(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabs2", {TestCase.first}, TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabs4(unsigned int *const DeviceResult, unsigned int Input1) { @@ -68,19 +73,24 @@ void testVabs4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabs4(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vabs4", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabs4(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabs4", {TestCase.first}, TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsdiffs2(unsigned int *const DeviceResult, unsigned int Input1, @@ -95,22 +105,27 @@ void testVabsdiffs2Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsdiffs2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vabsdiffs2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsdiffs2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsdiffs2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsdiffs4(unsigned int *const DeviceResult, unsigned int Input1, @@ -125,22 +140,27 @@ void testVabsdiffs4Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsdiffs4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vabsdiffs4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsdiffs4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsdiffs4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsdiffu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -155,22 +175,27 @@ void testVabsdiffu2Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsdiffu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vabsdiffu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsdiffu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsdiffu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsdiffu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -185,22 +210,27 @@ void testVabsdiffu4Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsdiffu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vabsdiffu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsdiffu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsdiffu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsss2(unsigned int *const DeviceResult, unsigned int Input1) { @@ -214,19 +244,25 @@ void testVabsss2Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsss2(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vabsss2", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsss2(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsss2", {TestCase.first}, TestCase.second, + *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vabsss4(unsigned int *const DeviceResult, unsigned int Input1) { @@ -240,19 +276,25 @@ void testVabsss4Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vabsss4(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vabsss4", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vabsss4(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vabsss4", {TestCase.first}, TestCase.second, + *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vadd2(unsigned int *const DeviceResult, unsigned int Input1, @@ -266,22 +308,27 @@ void testVadd2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vadd2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vadd2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vadd2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vadd2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vadd4(unsigned int *const DeviceResult, unsigned int Input1, @@ -295,22 +342,27 @@ void testVadd4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vadd4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vadd4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vadd4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vadd4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vaddss2(unsigned int *const DeviceResult, unsigned int Input1, @@ -324,22 +376,27 @@ void testVaddss2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vaddss2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vaddss2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vaddss2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vaddss2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vaddss4(unsigned int *const DeviceResult, unsigned int Input1, @@ -353,22 +410,27 @@ void testVaddss4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vaddss4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vaddss4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vaddss4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vaddss4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vaddus2(unsigned int *const DeviceResult, unsigned int Input1, @@ -382,22 +444,27 @@ void testVaddus2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vaddus2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vaddus2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vaddus2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vaddus2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vaddus4(unsigned int *const DeviceResult, unsigned int Input1, @@ -411,22 +478,27 @@ void testVaddus4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vaddus4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vaddus4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vaddus4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vaddus4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vavgs2(unsigned int *const DeviceResult, unsigned int Input1, @@ -440,22 +512,27 @@ void testVavgs2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vavgs2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vavgs2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vavgs2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vavgs2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vavgs4(unsigned int *const DeviceResult, unsigned int Input1, @@ -469,22 +546,27 @@ void testVavgs4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vavgs4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vavgs4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vavgs4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vavgs4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vavgu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -498,22 +580,27 @@ void testVavgu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vavgu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vavgu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vavgu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vavgu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vavgu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -527,22 +614,27 @@ void testVavgu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vavgu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vavgu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vavgu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vavgu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpeq2(unsigned int *const DeviceResult, unsigned int Input1, @@ -556,22 +648,27 @@ void testVcmpeq2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpeq2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpeq2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpeq2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpeq2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpeq4(unsigned int *const DeviceResult, unsigned int Input1, @@ -585,22 +682,27 @@ void testVcmpeq4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpeq4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpeq4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpeq4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpeq4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpges2(unsigned int *const DeviceResult, unsigned int Input1, @@ -614,22 +716,27 @@ void testVcmpges2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpges2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpges2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpges2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpges2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpges4(unsigned int *const DeviceResult, unsigned int Input1, @@ -643,22 +750,27 @@ void testVcmpges4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpges4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpges4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpges4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpges4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgeu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -672,22 +784,27 @@ void testVcmpgeu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgeu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgeu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgeu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgeu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgeu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -701,22 +818,27 @@ void testVcmpgeu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgeu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgeu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgeu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgeu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgts2(unsigned int *const DeviceResult, unsigned int Input1, @@ -730,22 +852,27 @@ void testVcmpgts2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgts2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgts2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgts2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgts2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgts4(unsigned int *const DeviceResult, unsigned int Input1, @@ -759,22 +886,27 @@ void testVcmpgts4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgts4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgts4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgts4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgts4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgtu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -788,22 +920,27 @@ void testVcmpgtu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgtu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgtu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgtu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgtu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpgtu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -817,22 +954,27 @@ void testVcmpgtu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpgtu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpgtu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpgtu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpgtu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmples2(unsigned int *const DeviceResult, unsigned int Input1, @@ -846,22 +988,27 @@ void testVcmples2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmples2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmples2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmples2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmples2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmples4(unsigned int *const DeviceResult, unsigned int Input1, @@ -875,22 +1022,27 @@ void testVcmples4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmples4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmples4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmples4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmples4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpleu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -904,22 +1056,27 @@ void testVcmpleu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpleu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpleu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpleu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpleu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpleu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -933,22 +1090,27 @@ void testVcmpleu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpleu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpleu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpleu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpleu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmplts2(unsigned int *const DeviceResult, unsigned int Input1, @@ -962,22 +1124,27 @@ void testVcmplts2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmplts2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmplts2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmplts2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmplts2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmplts4(unsigned int *const DeviceResult, unsigned int Input1, @@ -991,22 +1158,27 @@ void testVcmplts4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmplts4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmplts4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmplts4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmplts4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpltu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1020,22 +1192,27 @@ void testVcmpltu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpltu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpltu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpltu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpltu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpltu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1049,22 +1226,27 @@ void testVcmpltu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpltu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpltu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpltu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpltu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpne2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1078,22 +1260,27 @@ void testVcmpne2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpne2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpne2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpne2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpne2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vcmpne4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1107,22 +1294,27 @@ void testVcmpne4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vcmpne4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vcmpne4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vcmpne4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vcmpne4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vhaddu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1136,22 +1328,27 @@ void testVhaddu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vhaddu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vhaddu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vhaddu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vhaddu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vhaddu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1165,22 +1362,27 @@ void testVhaddu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vhaddu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vhaddu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vhaddu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vhaddu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmaxs2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1194,22 +1396,27 @@ void testVmaxs2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmaxs2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmaxs2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmaxs2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmaxs2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmaxs4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1223,22 +1430,27 @@ void testVmaxs4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmaxs4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmaxs4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmaxs4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmaxs4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmaxu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1252,22 +1464,27 @@ void testVmaxu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmaxu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmaxu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmaxu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmaxu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmaxu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1281,22 +1498,27 @@ void testVmaxu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmaxu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmaxu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmaxu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmaxu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmins2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1310,22 +1532,27 @@ void testVmins2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmins2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmins2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmins2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmins2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vmins4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1339,22 +1566,27 @@ void testVmins4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vmins4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vmins4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vmins4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vmins4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vminu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1368,22 +1600,27 @@ void testVminu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vminu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vminu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vminu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vminu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vminu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1397,22 +1634,27 @@ void testVminu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vminu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vminu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vminu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vminu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vneg2(unsigned int *const DeviceResult, unsigned int Input1) { @@ -1425,19 +1667,24 @@ void testVneg2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vneg2(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vneg2", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vneg2(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vneg2", {TestCase.first}, TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vneg4(unsigned int *const DeviceResult, unsigned int Input1) { @@ -1450,19 +1697,24 @@ void testVneg4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vneg4(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vneg4", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vneg4(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vneg4", {TestCase.first}, TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vnegss2(unsigned int *const DeviceResult, unsigned int Input1) { @@ -1476,19 +1728,25 @@ void testVnegss2Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vnegss2(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vnegss2", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vnegss2(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vnegss2", {TestCase.first}, TestCase.second, + *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vnegss4(unsigned int *const DeviceResult, unsigned int Input1) { @@ -1502,19 +1760,25 @@ void testVnegss4Cases( unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_ct1 = TestCase.first; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_ct1 = TestCase.first; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vnegss4(DeviceResult, TestCase_first_ct1); - }); - }); - q_ct1.wait(); - checkResult("__vnegss4", {TestCase.first}, TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vnegss4(DeviceResult, TestCase_first_ct1); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vnegss4", {TestCase.first}, TestCase.second, + *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsads2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1528,22 +1792,27 @@ void testVsads2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsads2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsads2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsads2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsads2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsads4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1557,22 +1826,27 @@ void testVsads4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsads4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsads4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsads4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsads4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsadu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1586,22 +1860,27 @@ void testVsadu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsadu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsadu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsadu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsadu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsadu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1615,22 +1894,27 @@ void testVsadu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsadu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsadu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsadu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsadu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vseteq2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1644,22 +1928,27 @@ void testVseteq2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vseteq2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vseteq2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vseteq2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vseteq2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vseteq4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1673,22 +1962,27 @@ void testVseteq4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vseteq4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vseteq4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vseteq4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vseteq4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetges2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1702,22 +1996,27 @@ void testVsetges2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetges2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetges2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetges2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetges2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetges4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1731,22 +2030,27 @@ void testVsetges4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetges4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetges4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetges4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetges4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgeu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1760,22 +2064,27 @@ void testVsetgeu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgeu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgeu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgeu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgeu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgeu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1789,22 +2098,27 @@ void testVsetgeu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgeu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgeu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgeu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgeu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgts2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1818,22 +2132,27 @@ void testVsetgts2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgts2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgts2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgts2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgts2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgts4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1847,22 +2166,27 @@ void testVsetgts4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgts4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgts4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgts4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgts4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgtu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1876,22 +2200,27 @@ void testVsetgtu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgtu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgtu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgtu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgtu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetgtu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1905,22 +2234,27 @@ void testVsetgtu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetgtu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetgtu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetgtu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetgtu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetles2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1934,22 +2268,27 @@ void testVsetles2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetles2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetles2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetles2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetles2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetles4(unsigned int *const DeviceResult, unsigned int Input1, @@ -1963,22 +2302,27 @@ void testVsetles4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetles4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetles4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetles4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetles4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetleu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -1992,22 +2336,27 @@ void testVsetleu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetleu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetleu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetleu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetleu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetleu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2021,22 +2370,27 @@ void testVsetleu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetleu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetleu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetleu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetleu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetlts2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2050,22 +2404,27 @@ void testVsetlts2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetlts2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetlts2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetlts2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetlts2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetlts4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2079,22 +2438,27 @@ void testVsetlts4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetlts4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetlts4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetlts4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetlts4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetltu2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2108,22 +2472,27 @@ void testVsetltu2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetltu2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetltu2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetltu2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetltu2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetltu4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2137,22 +2506,27 @@ void testVsetltu4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetltu4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetltu4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetltu4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetltu4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetne2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2166,22 +2540,27 @@ void testVsetne2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetne2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetne2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetne2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetne2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsetne4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2195,22 +2574,27 @@ void testVsetne4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsetne4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsetne4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsetne4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsetne4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsub2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2224,22 +2608,27 @@ void testVsub2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsub2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsub2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsub2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsub2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsub4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2253,22 +2642,27 @@ void testVsub4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsub4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsub4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsub4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsub4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsubss2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2282,22 +2676,27 @@ void testVsubss2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsubss2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsubss2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsubss2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsubss2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsubss4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2311,22 +2710,27 @@ void testVsubss4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsubss4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsubss4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsubss4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsubss4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsubus2(unsigned int *const DeviceResult, unsigned int Input1, @@ -2340,22 +2744,27 @@ void testVsubus2Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsubus2(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsubus2", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsubus2(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsubus2", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } void vsubus4(unsigned int *const DeviceResult, unsigned int Input1, @@ -2369,22 +2778,27 @@ void testVsubus4Cases(const vector> &TestCases) { unsigned int *DeviceResult; DeviceResult = (unsigned int *)sycl::malloc_shared(sizeof(*DeviceResult), q_ct1); - for (const auto &TestCase : TestCases) { - q_ct1.submit([&](sycl::handler &cgh) { - auto TestCase_first_first_ct1 = TestCase.first.first; - auto TestCase_first_second_ct2 = TestCase.first.second; + try { + for (const auto &TestCase : TestCases) { + q_ct1.submit([&](sycl::handler &cgh) { + auto TestCase_first_first_ct1 = TestCase.first.first; + auto TestCase_first_second_ct2 = TestCase.first.second; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - vsubus4(DeviceResult, TestCase_first_first_ct1, - TestCase_first_second_ct2); - }); - }); - q_ct1.wait(); - checkResult("__vsubus4", {TestCase.first.first, TestCase.first.second}, - TestCase.second, *DeviceResult); + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + vsubus4(DeviceResult, TestCase_first_first_ct1, + TestCase_first_second_ct2); + }); + }); + q_ct1.wait_and_throw(); + checkResult("__vsubus4", {TestCase.first.first, TestCase.first.second}, + TestCase.second, *DeviceResult); + } + } catch (...) { + // Intentionally left empty to make sure allocated memory can be freed. } + sycl::free(DeviceResult, q_ct1); } int main() { diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index b0b0629cffbd6..c0a02a874b962 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -68,10 +68,10 @@ add_lit_testsuite(check-sycl-deploy "Running the SYCL regression tests" set_target_properties(check-sycl-deploy PROPERTIES FOLDER "SYCL tests") set(TRIPLES "spir64-unknown-unknown") -if (SYCL_BUILD_BACKEND_CUDA OR (SYCL_BUILD_BACKEND_HIP AND "${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA")) +if (SYCL_BUILD_BACKEND_CUDA) set(TRIPLES "${TRIPLES},nvptx64-nvidia-cuda") endif() -if ((SYCL_BUILD_BACKEND_HIP AND "${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD")) +if (SYCL_BUILD_BACKEND_HIP) set(TRIPLES "${TRIPLES},amdgcn-amd-amdhsa") endif() diff --git a/sycl/test/basic_tests/macros.cpp b/sycl/test/basic_tests/macros.cpp index 9eb09d27b1b9e..e9163d4677606 100644 --- a/sycl/test/basic_tests/macros.cpp +++ b/sycl/test/basic_tests/macros.cpp @@ -1,12 +1,12 @@ // RUN: %clangxx %fsycl-host-only -dM -E %s -o %t.host // RUN: %clangxx -fsycl -fsycl-targets=spir64-unknown-unknown -fsycl-device-only -dM -E %s -o %t.device.spirv // RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fsycl-device-only -dM -E %s -o %t.device.cuda %} -// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-only -dM -E %s -o %t.device.hip %} +// RUN: %if hip %{ %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-only -dM -E %s -o %t.device.hip %} // // RUN: FileCheck %s < %t.host --check-prefixes=COMMON --implicit-check-not=__SPIRV // RUN: FileCheck %s < %t.device.spirv --check-prefixes=DEVICE,COMMON --implicit-check-not=__SPIRV // RUN: %if cuda %{ FileCheck %s < %t.device.cuda --check-prefixes=DEVICE,COMMON --implicit-check-not=__SPIRV %} -// RUN: %if hip_amd %{ FileCheck %s < %t.device.hip --check-prefixes=DEVICE,COMMON --implicit-check-not=__SPIRV %} +// RUN: %if hip %{ FileCheck %s < %t.device.hip --check-prefixes=DEVICE,COMMON --implicit-check-not=__SPIRV %} // // FIXME: we should also check that we don't leak __SYCL* and SYCL* macro from // our header files. diff --git a/sycl/test/basic_tests/macros_no_rdc.cpp b/sycl/test/basic_tests/macros_no_rdc.cpp index aed25568e6a6e..22a48012a9006 100644 --- a/sycl/test/basic_tests/macros_no_rdc.cpp +++ b/sycl/test/basic_tests/macros_no_rdc.cpp @@ -2,17 +2,17 @@ // RUN: %clangxx %fsycl-host-only -fno-sycl-rdc -E -dD %s -o %t.host // RUN: %clangxx -fsycl -fsycl-targets=spir64-unknown-unknown -fsycl-device-only -E -dD -fno-sycl-rdc %s -o %t.device.spirv // RUN: %if cuda %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fsycl-device-only -E -dD -fno-sycl-rdc %s -o %t.device.cuda %} -// RUN: %if hip_amd %{ %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-only -E -dD -fno-sycl-rdc %s -o %t.device.hip %} +// RUN: %if hip %{ %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-only -E -dD -fno-sycl-rdc %s -o %t.device.hip %} // // RUN: FileCheck --match-full-lines %s < %t.host --check-prefixes=HOST // RUN: FileCheck --match-full-lines %s < %t.device.spirv --check-prefixes=DEVICE-FULL-LINE --implicit-check-not="#define SYCL_EXTERNAL" // RUN: %if cuda %{ FileCheck --match-full-lines %s < %t.device.cuda --check-prefixes=DEVICE-FULL-LINE --implicit-check-not="#define SYCL_EXTERNAL" %} -// RUN: %if hip_amd %{ FileCheck --match-full-lines %s < %t.device.hip --check-prefixes=DEVICE-FULL-LINE --implicit-check-not="#define SYCL_EXTERNAL" %} +// RUN: %if hip %{ FileCheck --match-full-lines %s < %t.device.hip --check-prefixes=DEVICE-FULL-LINE --implicit-check-not="#define SYCL_EXTERNAL" %} // // Remove __DPCPP_SYCL_EXTERNAL to simplify regex for DEVICE prefix // RUN: sed 's|__DPCPP_SYCL_EXTERNAL||g' %t.device.spirv | FileCheck %s --check-prefixes=DEVICE // RUN: %if cuda %{ sed 's|__DPCPP_SYCL_EXTERNAL||g' %t.device.cuda | FileCheck %s --check-prefixes=DEVICE %} -// RUN: %if hip_amd %{ sed 's|__DPCPP_SYCL_EXTERNAL||g' %t.device.hip | FileCheck %s --check-prefixes=DEVICE %} +// RUN: %if hip %{ sed 's|__DPCPP_SYCL_EXTERNAL||g' %t.device.hip | FileCheck %s --check-prefixes=DEVICE %} // RUN: // // With -fno-sycl-rdc, device code should not define or use SYCL_EXTERNAL diff --git a/sycl/test/check_device_code/atomic_ref.cpp b/sycl/test/check_device_code/atomic_ref.cpp index 79b12590e1a5d..648ae01ddb192 100644 --- a/sycl/test/check_device_code/atomic_ref.cpp +++ b/sycl/test/check_device_code/atomic_ref.cpp @@ -6,8 +6,8 @@ // CHECK-LABEL: define dso_local spir_func noundef i32 @_Z17atomic_ref_globalRi( // CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) noundef align 4 dereferenceable(4) [[I]], i32 noundef 5) #[[ATTR3:[0-9]+]] -// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[CALL_I_I_I_I_I_I]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1) +// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret i32 [[CALL3_I_I]] // SYCL_EXTERNAL auto atomic_ref_global(int &i) { diff --git a/sycl/test/check_device_code/extensions/address_cast.cpp b/sycl/test/check_device_code/extensions/address_cast.cpp index bc401e3c72fce..6b3bacc2c424d 100644 --- a/sycl/test/check_device_code/extensions/address_cast.cpp +++ b/sycl/test/check_device_code/extensions/address_cast.cpp @@ -13,29 +13,29 @@ using namespace sycl::ext::oneapi::experimental; namespace static_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA12:![0-9]+]], !alias.scope [[META14:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(1) +// CHECK-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA12:![0-9]+]], !alias.scope [[META14:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast23to_global_not_decoratedEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META19:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] -// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA20:![0-9]+]], !alias.scope [[META22:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1) +// CHECK-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA20:![0-9]+]], !alias.scope [[META22:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META25:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META26:![0-9]+]] @@ -45,7 +45,7 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast24to_generic_not_decoratedEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META32:![0-9]+]] // CHECK-NEXT: ret void @@ -55,7 +55,7 @@ SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast16to_global_deviceEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4]] !srcloc [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(5) // CHECK-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA36:![0-9]+]], !alias.scope [[META38:![0-9]+]] @@ -66,7 +66,7 @@ SYCL_EXTERNAL auto to_global_device(int *p) { } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast14to_global_hostEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4]] !srcloc [[META41:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(6) // CHECK-NEXT: store ptr addrspace(6) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA42:![0-9]+]], !alias.scope [[META44:![0-9]+]] @@ -79,11 +79,11 @@ SYCL_EXTERNAL auto to_global_host(int *p) { namespace dynamic_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META47:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA12]], !alias.scope [[META48:![0-9]+]] // CHECK-NEXT: ret void // @@ -91,9 +91,9 @@ SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast23to_global_not_decoratedEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META53:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA20]], !alias.scope [[META54:![0-9]+]] // CHECK-NEXT: ret void // @@ -101,7 +101,7 @@ SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR3]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META58:![0-9]+]] @@ -111,7 +111,7 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast24to_generic_not_decoratedEPi( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA30]], !alias.scope [[META62:![0-9]+]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/native_cpu/vectorization.cpp b/sycl/test/check_device_code/native_cpu/vectorization.cpp index 12b8a21cc069e..a9f571c9225db 100644 --- a/sycl/test/check_device_code/native_cpu/vectorization.cpp +++ b/sycl/test/check_device_code/native_cpu/vectorization.cpp @@ -18,9 +18,9 @@ int main() { sycl::range<1> r(1); deviceQueue.submit([&](sycl::handler &h) { h.parallel_for(r, [=](sycl::id<1> id) { acc[id[0]] = 42; }); - // CHECK-DEFAULT: store <8 x i32> - // CHECK-16: store <16 x i32> - // CHECK-4: store <4 x i32> + // CHECK-DEFAULT: store <8 x i32> splat (i32 42) + // CHECK-16: store <16 x i32> splat (i32 42) + // CHECK-4: store <4 x i32> splat (i32 42) // CHECK-O0: store i32 42 // CHECK-O0-NOT: store <{{.*}}> // CHECK-DISABLE: store i32 42 diff --git a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp index 1de08c8f0c495..2d42dd7f1fae9 100644 --- a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp +++ b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp @@ -54,7 +54,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 415 +// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 414 // // List of improperly UNSUPPORTED tests. // Remove the CHECK once the test has been properly UNSUPPORTED. @@ -62,7 +62,6 @@ // CHECK: AOT/early_aot.cpp // CHECK-NEXT: AOT/gpu.cpp // CHECK-NEXT: AOT/multiple-devices.cpp -// CHECK-NEXT: Adapters/enqueue-arg-order-buffer.cpp // CHECK-NEXT: Adapters/enqueue-arg-order-image.cpp // CHECK-NEXT: Adapters/enqueue-arg-order-image.cpp // CHECK-NEXT: Adapters/interop-l0-direct.cpp diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 788d9ab37a9e3..089395d5c1400 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -168,7 +168,7 @@ if "amdgcn-amd-amdhsa" in triple: llvm_config.with_system_environment("ROCM_PATH") - config.available_features.add("hip_amd") + config.available_features.add("hip") # For AMD the specific GPU has to be specified with --offload-arch if not any([f.startswith("--offload-arch") for f in additional_flags]): # If the offload arch wasn't specified in SYCL_CLANG_EXTRA_FLAGS,