From 607659ba5dadd08a1dc12bc0dd095805bd6b8e88 Mon Sep 17 00:00:00 2001 From: RDW Date: Tue, 6 Feb 2024 03:50:42 +0100 Subject: [PATCH 1/2] Client: Update the GPU limits for dynamic uniform buffers 256 bytes is the required alignment for some (many?) nVidia GPUs. In order to ensure the chosen alignment works on those devices, the easiest way is to to sync the uniform struct size and the required limit so that both are always identical. This large an increase is fairly wasteful if less would also do, and reduces the number of total widget instances that can be rendered. But it should still be sufficiently large, and there's no getting around the fact that certain devices (mostly nVidia ones) don't support the current limit. --- Core/NativeClient/WebGPU/GPU.lua | 12 +----------- Core/NativeClient/WebGPU/UniformBuffer.lua | 5 +++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Core/NativeClient/WebGPU/GPU.lua b/Core/NativeClient/WebGPU/GPU.lua index 7265329e..92ba3e68 100644 --- a/Core/NativeClient/WebGPU/GPU.lua +++ b/Core/NativeClient/WebGPU/GPU.lua @@ -82,14 +82,11 @@ function GPU:RequestLogicalDevice(adapter, options) maxBindingsPerBindGroup = 2, -- Max. allowed binding index maxDynamicUniformBuffersPerPipelineLayout = 1, minStorageBufferOffsetAlignment = 32, - minUniformBufferOffsetAlignment = 32, + minUniformBufferOffsetAlignment = ffi.sizeof("mesh_uniform_t"), }, }), }) - assert(supportedLimits.limits.minUniformBufferOffsetAlignment <= 32, "Dynamic uniform headaches will ensue") - self.minUniformBufferOffsetAlignment = supportedLimits.limits.minUniformBufferOffsetAlignment - local requestedDevice local function onDeviceRequested(status, device, message, userdata) local success = status == ffi.C.WGPURequestDeviceStatus_Success @@ -129,11 +126,4 @@ function GPU:RequestLogicalDevice(adapter, options) return requestedDevice, deviceDescriptor end -function GPU:GetAlignedDynamicUniformBufferStride(uniformStructSizeInBytes) - local step = self.minUniformBufferOffsetAlignment - -- More headaches if the dynamic uniforms (e.g., widget transforms) are smaller than the minimum stride... - local divide_and_ceil = uniformStructSizeInBytes / step + (uniformStructSizeInBytes % step == 0 and 0 or 1) - return step * divide_and_ceil -end - return GPU diff --git a/Core/NativeClient/WebGPU/UniformBuffer.lua b/Core/NativeClient/WebGPU/UniformBuffer.lua index 5b168b4a..82d1cafe 100644 --- a/Core/NativeClient/WebGPU/UniformBuffer.lua +++ b/Core/NativeClient/WebGPU/UniformBuffer.lua @@ -66,8 +66,9 @@ local UniformBuffer = { } water_uniform_t; typedef struct PerMeshData { float translation[2]; // 8 - float padding[6]; // 32 - // Total size must be at least minUniformBufferOffsetAlignment bytes large (with 16 byte alignment) + float padding[62]; // 256 + // Struct size must be aligned with minUniformBufferOffsetAlignment (GPU limit) + // Padding needs to be updated whenever the struct changes! } mesh_uniform_t; ]], } From 8d321afce9df786a27c6969d39ccc4484c8202c9 Mon Sep 17 00:00:00 2001 From: RDW Date: Tue, 6 Feb 2024 04:00:52 +0100 Subject: [PATCH 2/2] Client: Ensure the widget instance count is within GPU limits The total number of available widget instances must be limited both by the dynamic buffer size that's pre-allocated and the size of each slot in the dynamic buffer. --- Core/NativeClient/WebGPU/GPU.lua | 3 ++- Core/NativeClient/WebGPU/Pipelines/WidgetDrawingPipeline.lua | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Core/NativeClient/WebGPU/GPU.lua b/Core/NativeClient/WebGPU/GPU.lua index 92ba3e68..49e11a5f 100644 --- a/Core/NativeClient/WebGPU/GPU.lua +++ b/Core/NativeClient/WebGPU/GPU.lua @@ -11,6 +11,7 @@ local GPU = { MAX_VERTEX_COUNT = 200000, -- Should be configurable (later) MAX_TEXTURE_ARRAY_SIZE = 32, MAX_BUFFER_SIZE = 256 * 1024 * 1024, + MAX_UNIFORM_BUFFER_BINDING_SIZE = 65536, } function GPU:CreateInstance() @@ -78,7 +79,7 @@ function GPU:RequestLogicalDevice(adapter, options) maxUniformBuffersPerShaderStage = 1, -- Camera properties (increase for material, soon?) maxSampledTexturesPerShaderStage = GPU.MAX_TEXTURE_ARRAY_SIZE, maxSamplersPerShaderStage = GPU.MAX_TEXTURE_ARRAY_SIZE, - maxUniformBufferBindingSize = 65536, -- DEFAULT + maxUniformBufferBindingSize = GPU.MAX_UNIFORM_BUFFER_BINDING_SIZE, maxBindingsPerBindGroup = 2, -- Max. allowed binding index maxDynamicUniformBuffersPerPipelineLayout = 1, minStorageBufferOffsetAlignment = 32, diff --git a/Core/NativeClient/WebGPU/Pipelines/WidgetDrawingPipeline.lua b/Core/NativeClient/WebGPU/Pipelines/WidgetDrawingPipeline.lua index e5654bb7..58576799 100644 --- a/Core/NativeClient/WebGPU/Pipelines/WidgetDrawingPipeline.lua +++ b/Core/NativeClient/WebGPU/Pipelines/WidgetDrawingPipeline.lua @@ -4,6 +4,7 @@ local webgpu = require("webgpu") local Device = require("Core.NativeClient.WebGPU.Device") +local GPU = require("Core.NativeClient.WebGPU.GPU") local UniformBuffer = require("Core.NativeClient.WebGPU.UniformBuffer") local binary_not = bit.bnot @@ -12,7 +13,7 @@ local sizeof = ffi.sizeof local WidgetDrawingPipeline = { WGSL_SHADER_SOURCE_LOCATION = "Core/NativeClient/WebGPU/Shaders/UserInterfaceShader.wgsl", - MAX_WIDGET_COUNT = 2048, -- The default maxUniformBufferBindingSize allows for this many without optimizing further/removing padding/using other buffer types or even hardware instacing for the UI + MAX_WIDGET_COUNT = GPU.MAX_UNIFORM_BUFFER_BINDING_SIZE / sizeof("mesh_uniform_t"), } function WidgetDrawingPipeline:Construct(wgpuDeviceHandle, textureFormatID)