Skip to content

Commit

Permalink
Use the block heuristic when determining a launch configuration.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Jan 21, 2022
1 parent 4cdb50b commit 3ee97c9
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/device/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ end
function launch_configuration(backend::AbstractGPUBackend, heuristic;
elements::Int, elements_per_thread::Int)
threads = clamp(elements, 1, heuristic.threads)
blocks = max(cld(elements, threads), 1)
blocks = clamp(cld(elements, threads), elements, heuristic.blocks)
threads = cld(elements, blocks)

if elements_per_thread > 1 && blocks > heuristic.blocks
# we want to launch more blocks than required, so prefer a grid-stride loop instead
Expand Down
12 changes: 8 additions & 4 deletions test/testsuite/gpuinterface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,28 @@
x = AT(Vector{Int}(undef, N))
x .= 0
gpu_call(x) do ctx, x
x[linear_index(ctx)] = 2
i = @linearidx x
x[i] = 2
return
end
@test all(x-> x == 2, Array(x))

gpu_call(x; elements=N) do ctx, x
x[linear_index(ctx)] = 2
i = @linearidx x
x[i] = 2
return
end
@test all(x-> x == 2, Array(x))
gpu_call(x; threads=2, blocks=(N ÷ 2)) do ctx, x
x[linear_index(ctx)] = threadidx(ctx)
i = @linearidx x
x[i] = threadidx(ctx)
return
end
@test Array(x) == [1,2,1,2,1,2,1,2,1,2]

gpu_call(x; threads=2, blocks=(N ÷ 2)) do ctx, x
x[linear_index(ctx)] = blockidx(ctx)
i = @linearidx x
x[i] = blockidx(ctx)
return
end
@test Array(x) == [1, 1, 2, 2, 3, 3, 4, 4, 5, 5]
Expand Down

0 comments on commit 3ee97c9

Please sign in to comment.