Skip to content

Commit

Permalink
adding atomics
Browse files Browse the repository at this point in the history
  • Loading branch information
leios committed Jun 13, 2022
1 parent d52a6f3 commit 3626b03
Show file tree
Hide file tree
Showing 6 changed files with 475 additions and 0 deletions.
26 changes: 26 additions & 0 deletions lib/CUDAKernels/src/CUDAKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ else
end

import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size
import KernelAbstractions: atomic_add!, atomic_and!, atomic_cas!, atomic_dec!, atomic_inc!, atomic_max!, atomic_min!, atomic_op!, atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!

###
# GPU implementation of shared memory
Expand Down Expand Up @@ -395,4 +396,29 @@ Adapt.adapt_storage(to::ConstAdaptor, a::CUDA.CuDeviceArray) = Base.Experimental
# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CUDADevice}, arg) = CUDA.cudaconvert(arg)


###
# GPU implementation of atomics
###

afxs = Dict(
atomic_add! => CUDA.atomic_add!,
atomic_and! => CUDA.atomic_and!,
atomic_cas! => CUDA.atomic_cas!,
atomic_dec! => CUDA.atomic_dec!,
atomic_inc! => CUDA.atomic_inc!,
atomic_max! => CUDA.atomic_max!,
atomic_min! => CUDA.atomic_min!,
atomic_op! => CUDA.atomic_op!,
atomic_or! => CUDA.atomic_or!,
atomic_sub! => CUDA.atomic_sub!,
atomic_xchg! => CUDA.atomic_xchg!,
atomic_xor! => CUDA.atomic_xor!
)

for (afx, cfx) in afxs
@device_override @inline function afx(args...)
cfx(args...)
end
end
end
4 changes: 4 additions & 0 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,10 @@ include("extras/extras.jl")

include("reflection.jl")

# Atomics

include("atomics.jl")

# CPU backend

include("cpu.jl")
Expand Down
203 changes: 203 additions & 0 deletions src/atomics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
###
# Atomics
###

export atomic_add!, atomic_sub!, atomic_and!, atomic_or!, atomic_xor!,
atomic_min!, atomic_max!, atomic_inc!, atomic_dec!, atomic_xchg!,
atomic_op!, atomic_cas!

# helper functions for inc(rement) and dec(rement)
function dec(a::T,b::T) where T
((a == 0) | (a > b)) ? b : (a-T(1))
end

function inc(a::T,b::T) where T
(a >= b) ? T(0) : (a+T(1))
end

# arithmetic, bitwise, min/max, and inc/dec operations
const ops = Dict(
:atomic_add! => +,
:atomic_sub! => -,
:atomic_and! => &,
:atomic_or! => |,
:atomic_xor! => ,
:atomic_min! => min,
:atomic_max! => max,
:atomic_inc! => inc,
:atomic_dec! => dec,
)

# Note: the type T prevents type convertion (for example, Float32 -> 64)
# can lead to errors if b is chosen to be of a different, compatible type
for (name, op) in ops
@eval @inline function $name(ptr::Ptr{T}, b::T) where T
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, $op, b::T, :monotonic)
end
end

"""
atomic_cas!(ptr::Ptr{T}, cmp::T, val::T)
This is an atomic Compare And Swap (CAS).
It reads the value `old` located at address `ptr` and compare with `cmp`.
If `old` equals `cmp`, it stores `val` at the same address.
Otherwise, doesn't change the value `old`.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Additionally, on GPU hardware with compute capability 7.0+, values of type UInt16 are supported.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
function atomic_cas!(ptr::Ptr{T}, old::T, new::T) where T
Core.Intrinsics.atomic_pointerreplace(ptr, old, new, :acquire_release, :monotonic)
end

"""
atomic_xchg!(ptr::Ptr{T}, val::T)
This is an atomic exchange.
It reads the value `old` located at address `ptr` and stores `val` at the same address.
These operations are performed in one atomic transaction. The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
function atomic_xchg!(ptr::Ptr{T}, b::T) where T
Core.Intrinsics.atomic_pointerswap(ptr::Ptr{T}, b::T, :monotonic)
end

"""
atomic_op!(ptr::Ptr{T}, val::T)
This is an arbitrary atomic operation.
It reads the value `old` located at address `ptr` and uses `val` in the operation `op` (defined elsewhere)
These operations are performed in one atomic transaction. The function returns `old`.
This function is somewhat experimental.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
function atomic_op!(ptr::Ptr{T}, op, b::T) where T
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, op, b::T, :monotonic)
end

# Other Documentation

"""
atomic_add!(ptr::Ptr{T}, val::T)
This is an atomic addition.
It reads the value `old` located at address `ptr`, computes `old + val`, and stores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32, UInt64, and Float32.
Additionally, on GPU hardware with compute capability 6.0+, values of type Float64 are supported.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_add!

"""
atomic_sub!(ptr::Ptr{T}, val::T)
This is an atomic subtraction.
It reads the value `old` located at address `ptr`, computes `old - val`, and stores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_sub!

"""
atomic_and!(ptr::Ptr{T}, val::T)
This is an atomic and.
It reads the value `old` located at address `ptr`, computes `old & val`, and stores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_and!

"""
atomic_or!(ptr::Ptr{T}, val::T)
This is an atomic or.
It reads the value `old` located at address `ptr`, computes `old | val`, and stores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_or!

"""
atomic_xor!(ptr::Ptr{T}, val::T)
This is an atomic xor.
It reads the value `old` located at address `ptr`, computes `old ⊻ val`, and stores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_xor!

"""
atomic_min!(ptr::Ptr{T}, val::T)
This is an atomic min.
It reads the value `old` located at address `ptr`, computes `min(old, val)`, and st ores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_min!

"""
atomic_max!(ptr::Ptr{T}, val::T)
This is an atomic max.
It reads the value `old` located at address `ptr`, computes `max(old, val)`, and st ores the result back to memory at the same address.
These operations are performed in one atomic transaction.
The function returns `old`.
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_max!

"""
atomic_inc!(ptr::Ptr{T}, val::T)
This is an atomic increment function that counts up to a certain number before starting again at 0.
It reads the value `old` located at address `ptr`, computes `((old >= val) ? 0 : (o ld+1))`, and stores the result back to memory at the same address.
These three operations are performed in one atomic transaction.
The function returns `old`.
This operation is only supported for values of type Int32.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_inc!

"""
atomic_dec!(ptr::Ptr{T}, val::T)
This is an atomic decrement function that counts down to 0 from a defined value `val`.
It reads the value `old` located at address `ptr`, computes `(((old == 0) | (old > val)) ? val : (old-1))`, and stores the result back to memory at the same address.
These three operations are performed in one atomic transaction.
The function returns `old`.
This operation is only supported for values of type Int32.
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
"""
atomic_dec!
27 changes: 27 additions & 0 deletions src/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,30 @@ end

# Argument conversion
KernelAbstractions.argconvert(k::Kernel{CPU}, arg) = arg

###
# CPU error handling if under 1.7
###

if Base.VERSION < v"1.7.0"

import KernelAbstractions: atomic_add!, atomic_and!, atomic_cas!,
atomic_dec!, atomic_inc!, atomic_max!,
atomic_min!, atomic_op!, atomic_or!,
atomic_sub!, atomic_xchg!, atomic_xor!

function atomic_error(args...)
error("CPU Atomics are not allowed for julia version under 1.7!")
end

afxs = [atomic_add!, atomic_and!, atomic_cas!, atomic_dec!,
atomic_inc!, atomic_max!, atomic_min!, atomic_op!,
atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!]

for afx in afxs
@inline function afx(ctx, idx::CartesianIndex)
atomic_error(args...)
end
end
end

Loading

0 comments on commit 3626b03

Please sign in to comment.