Skip to content

Commit

Permalink
Fix bug with size operator call (#81)
Browse files Browse the repository at this point in the history
Fix a bug introduced in the tensor_foreach.h file. We cannot calculate the size of a Coord object by doing size(size), and instead we need to call product() method.
  • Loading branch information
muhammad-tanvir-1211 authored and aacostadiaz committed Jul 16, 2024
1 parent 618a23a commit fed21e9
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ struct TensorForEach {
#if defined (CUTLASS_ENABLE_SYCL)
// TODO: query the queue for block size
block_size = 128;
grid_size = (size(size) + block_size - 1) / block_size;
grid_size = (size.product() + block_size - 1) / block_size;
int sm_count = KernelHardwareInfo::query_device_multiprocessor_count();
grid_size = grid_size > sm_count / 2 ? sm_count / 2 : grid_size;
#else
// if grid_size or block_size are zero, query occupancy using the CUDA Occupancy API
cudaError_t result = cudaOccupancyMaxPotentialBlockSize(
Expand All @@ -75,7 +77,7 @@ struct TensorForEach {
#if defined(CUTLASS_ENABLE_SYCL)
const auto sycl_block = syclcompat::dim3(block_size, 1, 1);
const auto sycl_grid = syclcompat::dim3(grid_size, 1, 1);
syclcompat::launch<kernel::TensorForEach<Func, Rank, Params>>(sycl_grid, sycl_block, 0, size, params);
syclcompat::launch<kernel::TensorForEach<Func, Rank, Params>>(sycl_grid, sycl_block, size, params);
#else
dim3 grid(grid_size, 1, 1);
dim3 block(block_size, 1, 1);
Expand Down Expand Up @@ -103,7 +105,7 @@ struct TensorDiagonalForEach {
#if defined(CUTLASS_ENABLE_SYCL)
const auto sycl_block = syclcompat::dim3(block_size, 1, 1);
const auto sycl_grid = syclcompat::dim3((end - start + block_size - 1) / block_size, 1, 1);
syclcompat::launch<kernel::TensorDiagonalForEach<Func, Rank, Params>>(sycl_grid, sycl_block, 0, size, params, start, end);
syclcompat::launch<kernel::TensorDiagonalForEach<Func, Rank, Params>>(sycl_grid, sycl_block, size, params, start, end);
#else
dim3 block(block_size, 1, 1);
dim3 grid((end - start + block_size - 1) / block_size, 1, 1);
Expand Down Expand Up @@ -153,7 +155,7 @@ struct BlockForEach {
#if defined(CUTLASS_ENABLE_SYCL)
const auto sycl_block = syclcompat::dim3(block_size, 1, 1);
const auto sycl_grid = syclcompat::dim3(grid_size, 1, 1);
syclcompat::launch<kernel::BlockForEach<Element, Func>>(sycl_grid, sycl_block, 0, ptr, capacity, params);
syclcompat::launch<kernel::BlockForEach<Element, Func>>(sycl_grid, sycl_block, ptr, capacity, params);
#else
dim3 grid(grid_size, 1, 1);
dim3 block(block_size, 1, 1);
Expand Down

0 comments on commit fed21e9

Please sign in to comment.