From d0693243477a7795efd0c5ae3ce5ed613843d1fd Mon Sep 17 00:00:00 2001 From: Adarsh Yoga Date: Fri, 12 Jan 2024 12:40:04 -0600 Subject: [PATCH] implementation of barrier operations + test cases --- numba_dpex/experimental/__init__.py | 5 +- .../_barriers_overloads.py | 132 ++++++++++++++++++ .../experimental/kernel_iface/__init__.py | 10 +- .../experimental/kernel_iface/barrier.py | 51 +++++++ .../spv_overloads/test_barriers.py | 28 ++++ 5 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py create mode 100644 numba_dpex/experimental/kernel_iface/barrier.py create mode 100644 numba_dpex/tests/experimental/kernel_iface/spv_overloads/test_barriers.py diff --git a/numba_dpex/experimental/__init__.py b/numba_dpex/experimental/__init__.py index e5601283a3..023f553afd 100644 --- a/numba_dpex/experimental/__init__.py +++ b/numba_dpex/experimental/__init__.py @@ -8,7 +8,10 @@ from numba.core.imputils import Registry -from ._kernel_dpcpp_spirv_overloads import _atomic_ref_overloads +from ._kernel_dpcpp_spirv_overloads import ( + _atomic_ref_overloads, + _barriers_overloads, +) from .decorators import device_func, kernel from .kernel_dispatcher import KernelDispatcher from .launcher import call_kernel, call_kernel_async diff --git a/numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py b/numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py new file mode 100644 index 0000000000..ed9acda18f --- /dev/null +++ b/numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py @@ -0,0 +1,132 @@ +# SPDX-FileCopyrightText: 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Provides overloads for functions included in kernel_iface.barrier that +generate dpcpp SPIR-V LLVM IR intrinsic function calls. +""" +from llvmlite import ir as llvmir +from numba.core import cgutils, types +from numba.extending import intrinsic, overload + +from numba_dpex.core import itanium_mangler as ext_itanium_mangler +from numba_dpex.experimental.kernel_iface import ( + group_barrier, + sub_group_barrier, +) +from numba_dpex.experimental.kernel_iface.memory_enums import ( + MemoryOrder, + MemoryScope, +) +from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME + +from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope + + +def _get_memory_scope(fence_scope): + if isinstance(fence_scope, types.Literal): + return get_scope(fence_scope.literal_value) + return get_scope(fence_scope.value) + + +@intrinsic +def _intrinsic_barrier( + ty_context, # pylint: disable=unused-argument + ty_exec_scope, # pylint: disable=unused-argument + ty_mem_scope, # pylint: disable=unused-argument + ty_spirv_mem_sem_mask, # pylint: disable=unused-argument +): + sig = types.void(types.uint32, types.uint32, types.uint32) + + def _intrinsic_barrier_codegen( + context, builder, sig, args + ): # pylint: disable=unused-argument + fn_name = "__spirv_ControlBarrier" + mangled_fn_name = ext_itanium_mangler.mangle_ext( + fn_name, [types.uint32, types.uint32, types.uint32] + ) + + spirv_fn_arg_types = [ + llvmir.IntType(32), + llvmir.IntType(32), + llvmir.IntType(32), + ] + + fnty = llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types) + + exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32)) + mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32)) + spirv_memory_semantics_mask_arg = builder.trunc( + args[2], llvmir.IntType(32) + ) + + fn_args = [ + exec_scope_arg, + mem_scope_arg, + spirv_memory_semantics_mask_arg, + ] + + fn = cgutils.get_or_insert_function( + builder.module, fnty, mangled_fn_name + ) + + fn.attributes.add("convergent") + fn.attributes.add("nounwind") + fn.calling_convention = "spir_func" + + callinst = builder.call(fn, fn_args) + + callinst.attributes.add("convergent") + callinst.attributes.add("nounwind") + + return ( + sig, + _intrinsic_barrier_codegen, + ) + + +@overload( + group_barrier, + prefer_literal=True, + target=DPEX_KERNEL_EXP_TARGET_NAME, +) +def _ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP): + spirv_memory_semantics_mask = get_memory_semantics_mask( + MemoryOrder.SEQ_CST.value + ) + exec_scope = get_scope(MemoryScope.WORK_GROUP.value) + mem_scope = _get_memory_scope(fence_scope) + + def _ol_group_barrier_impl( + fence_scope=MemoryScope.WORK_GROUP, + ): # pylint: disable=unused-argument + # pylint: disable=no-value-for-parameter + return _intrinsic_barrier( + exec_scope, mem_scope, spirv_memory_semantics_mask + ) + + return _ol_group_barrier_impl + + +@overload( + sub_group_barrier, + prefer_literal=True, + target=DPEX_KERNEL_EXP_TARGET_NAME, +) +def _ol_sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP): + spirv_memory_semantics_mask = get_memory_semantics_mask( + MemoryOrder.SEQ_CST.value + ) + exec_scope = get_scope(MemoryScope.SUB_GROUP.value) + mem_scope = _get_memory_scope(fence_scope) + + def _ol_sub_group_barrier_impl( + fence_scope=MemoryScope.SUB_GROUP, + ): # pylint: disable=unused-argument + # pylint: disable=no-value-for-parameter + return _intrinsic_barrier( + exec_scope, mem_scope, spirv_memory_semantics_mask + ) + + return _ol_sub_group_barrier_impl diff --git a/numba_dpex/experimental/kernel_iface/__init__.py b/numba_dpex/experimental/kernel_iface/__init__.py index 00d1ea4e0a..32d1b28ec5 100644 --- a/numba_dpex/experimental/kernel_iface/__init__.py +++ b/numba_dpex/experimental/kernel_iface/__init__.py @@ -9,6 +9,14 @@ """ from .atomic_ref import AtomicRef +from .barrier import group_barrier, sub_group_barrier from .memory_enums import AddressSpace, MemoryOrder, MemoryScope -__all__ = ["AddressSpace", "AtomicRef", "MemoryOrder", "MemoryScope"] +__all__ = [ + "group_barrier", + "sub_group_barrier", + "AddressSpace", + "AtomicRef", + "MemoryOrder", + "MemoryScope", +] diff --git a/numba_dpex/experimental/kernel_iface/barrier.py b/numba_dpex/experimental/kernel_iface/barrier.py new file mode 100644 index 0000000000..1b7d9089d7 --- /dev/null +++ b/numba_dpex/experimental/kernel_iface/barrier.py @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +"""Python functions that simulate SYCL's barrier primitives. +""" + +from .memory_enums import MemoryScope + + +def group_barrier(fence_scope=MemoryScope.WORK_GROUP): + """Performs a barrier operation across all work-items in a work group. + + The function is modeled after the ``sycl::group_barrier`` function. It + synchronizes work within a group of work items. All the work-items + of the group must execute the barrier construct before any work-item + continues execution beyond the barrier. However, unlike + ``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at + the level of a work group and does not allow specifying the group as an + argument. The :func:`sub_group_barrier` function should be used if + synchronization has to be performed only across a sub-group. + + The ``group_barrier`` performs mem-fence operations ensuring that memory + accesses issued before the barrier are not re-ordered with those issued + after the barrier: all work-items in group g execute a release fence prior + to synchronizing at the barrier, all work-items in group g execute an + acquire fence afterwards, and there is an implicit synchronization of these + fences as if provided by an explicit atomic operation on an atomic object. + + Args: + fence_scope (optional): scope of any memory consistency + operations that are performed by the barrier. + """ + + # TODO: A pure Python simulation of a group_barrier will be added later. + raise NotImplementedError + + +def sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP): + """Performs a barrier operation across all work-items in a sub-group. + + Modeled after ``sycl::group_barrier`` function when invoked on a + sub-group. Refer :func:`group_barrier` for further details. + + Args: + fence_scope (optional): scope of any memory consistency + operations that are performed by the barrier. + """ + + # TODO: A pure Python simulation of a sub_group_barrier will be added later. + raise NotImplementedError diff --git a/numba_dpex/tests/experimental/kernel_iface/spv_overloads/test_barriers.py b/numba_dpex/tests/experimental/kernel_iface/spv_overloads/test_barriers.py new file mode 100644 index 0000000000..3559b49b4c --- /dev/null +++ b/numba_dpex/tests/experimental/kernel_iface/spv_overloads/test_barriers.py @@ -0,0 +1,28 @@ +import dpnp + +import numba_dpex as dpex +import numba_dpex.experimental as dpex_exp +from numba_dpex.experimental.kernel_iface import group_barrier + + +def test_group_barrier(): + """A test for group_barrier function.""" + + @dpex_exp.kernel + def _kernel(a, N): + i = dpex.get_global_id(0) + + a[i] += 1 + group_barrier() + + if i == 0: + for idx in range(1, N): + a[0] += a[idx] + + N = 8196 + a = dpnp.ones(N, dtype=dpnp.int32) + b = dpnp.ones(N, dtype=dpnp.int32) + + dpex_exp.call_kernel(_kernel, dpex.Range(N), a, N) + + assert a[0] == N * 2