Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ET-VK] Adding a common utility function to calculate 3d output position based on unique index. #7522

Open
wants to merge 6 commits into
base: gh/trivedivivek/37/base
Choose a base branch
from
7 changes: 2 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#define op(X, A, B) ${OPERATOR}

#include "indexing_utils.h"
#include "indexing_utils_u16.h"

layout(std430) buffer;

Expand All @@ -35,10 +35,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
* output at a single output location.
*/
void main() {
const ivec3 pos = ivec3(
gl_GlobalInvocationID.x % out_limits.x,
(gl_GlobalInvocationID.x / out_limits.x) % out_limits.y,
gl_GlobalInvocationID.x / (out_limits.x * out_limits.y));
const ivec3 pos = idx_to_u16pos_x_wise(gl_GlobalInvocationID.x, out_limits.x, out_limits.y);

if (any(greaterThanEqual(pos, out_limits))) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#define op(X, A, B) ${OPERATOR}

#include "indexing_utils.h"
#include "indexing_utils_u16.h"

layout(std430) buffer;

Expand All @@ -43,12 +43,9 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
void main() {
// y divided up by batch size is used to determine 3d position
// since work size is calculated by x * ((y + B_Y - 1) / B_Y) * z
const uint out_limits_y_scaled = (out_limits.y + BATCH_SIZE_Y - 1) / BATCH_SIZE_Y;
const int out_limits_y_scaled = (out_limits.y + BATCH_SIZE_Y - 1) / BATCH_SIZE_Y;

u16vec3 pos = u16vec3(
gl_GlobalInvocationID.x % out_limits.x,
((gl_GlobalInvocationID.x / out_limits.x) % out_limits_y_scaled),
gl_GlobalInvocationID.x / (out_limits.x * out_limits_y_scaled));
u16vec3 pos = idx_to_u16pos_x_wise(gl_GlobalInvocationID.x, out_limits.x, out_limits_y_scaled);

// scale pos.y by batch size, because that's the top pixel to be processed
pos.y *= uint16_t(BATCH_SIZE_Y);
Expand Down
10 changes: 3 additions & 7 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#define op(X, A, B) ${OPERATOR}

#include "indexing_utils.h"
#include "indexing_utils_u16.h"

layout(std430) buffer;

Expand All @@ -43,13 +43,10 @@ shared u16vec2 pos_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroup
* size is only 1x1, making it easier to re-use loaded texels from t_kernel.
*/
void main() {
const uvec2 out_limits_scaled = (out_limits.xy + TILE_SIZE - 1) / TILE_SIZE;
const ivec2 out_limits_scaled = (out_limits.xy + TILE_SIZE - 1) / TILE_SIZE;
const uint shared_mem_stride = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;

const u16vec3 gpos = u16vec3(
gl_GlobalInvocationID.x % out_limits_scaled.x,
(gl_GlobalInvocationID.x / out_limits_scaled.x) % out_limits_scaled.y,
gl_GlobalInvocationID.x / (out_limits_scaled.x * out_limits_scaled.y));
const u16vec3 gpos = idx_to_u16pos_x_wise(gl_GlobalInvocationID.x, out_limits_scaled.x, out_limits_scaled.y);

// Output position for TILE_SIZE = 2
// +--------+--------+
Expand Down Expand Up @@ -98,7 +95,6 @@ void main() {
const vec4 ktex_2 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(2, 0));
const vec4 ktex_3 = texelFetchOffset(t_kernel, u16vec2(z, gpos.z), 0, u16vec2(3, 0));


#pragma unroll
for (int i = 0; i < TILE_SIZE * TILE_SIZE; ++i) {
const vec4 in_tex = texelFetch(t_in, u16vec3(ipos[i], z4), 0);
Expand Down
19 changes: 19 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/indexing_utils_u16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef INDEXING_UTILS_U16_H
#define INDEXING_UTILS_U16_H

#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require

u16vec3 idx_to_u16pos_x_wise(uint idx, int size_x, int size_y) {
const uint div_by_x = idx / size_x;
return u16vec3(idx % size_x, div_by_x % size_y, div_by_x / size_y);
}

#endif // INDEXING_UTILS_U16_H