From 4662c7814e959f2ad4653495f71f9a35e5687efd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 17 Nov 2023 17:43:00 +0100 Subject: [PATCH 01/27] frog: Begin sketching out descriptor hoisting idea. --- libs/vkd3d/command.c | 8 ---- libs/vkd3d/vkd3d_private.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index a041bcf8f0..d7e67e4c0f 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -2698,14 +2698,6 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, return S_OK; } -struct vkd3d_scratch_allocation -{ - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceAddress va; - void *host_ptr; -}; - static bool d3d12_command_allocator_allocate_scratch_memory(struct d3d12_command_allocator *allocator, enum vkd3d_scratch_pool_kind kind, VkDeviceSize size, VkDeviceSize alignment, uint32_t memory_types, diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 124cd75a35..43dc9d2e3a 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2023,6 +2023,43 @@ struct vkd3d_pipeline_cache_compatibility uint64_t dxbc_blob_hashes[VKD3D_MAX_SHADER_STAGES]; }; +struct d3d12_descriptor_copy_template_entry +{ + uint16_t dst_offset_words; + uint8_t constant_offset; + uint8_t table_index; + uint8_t set_index; + uint8_t count; +}; + +#define VKD3D_MAX_HOISTED_DESCRIPTORS 16 +/* Common case is VS + PS, MS + PS, CS. + * Ignore hoisting outside these (tess/geom/task) to keep memory usage low and number of sets. */ +#define VKD3D_MAX_HOIST_SHADER_STAGES 2 + +struct d3d12_descriptor_copy_template +{ + struct d3d12_descriptor_copy_template_entry entries[VKD3D_MAX_HOIST_SHADER_STAGES * VKD3D_MAX_HOISTED_DESCRIPTORS]; + unsigned int num_entries; + + /* For descriptor hoisting. Each pipeline stage gets its own set. */ + VkDescriptorSetLayout vk_hoist_descriptor_sets[VKD3D_MAX_HOIST_SHADER_STAGES]; + VkPipelineLayout vk_hoist_descriptor_set_layout; + + /* On draw time if table offsets are out of date: + * - Allocate descriptor_allocation_words from d3d12_command_list_descriptor_copy_batch::descriptor_buffer. + * - Allocate num_entries from d3d12_command_list_descriptor_copy_batch::host_buffer. + * - vkCmdBindDescriptorBufferOffsets(offsets = descriptor_offsets + alloc offset, + * first_hoist_set_index, num_hoist_sets). + * - Resolve constant_offset / table_index from command list state into src_offset. + * - Resolve dst_offset_words + alloc offset into dst_offset. + * - Copy set_index / count. */ + VkDeviceSize descriptor_offsets[VKD3D_MAX_HOIST_SHADER_STAGES]; + VkDeviceSize descriptor_allocation_size; + unsigned int num_hoist_sets; + unsigned int first_hoist_set_index; +}; + /* ID3D12PipelineState */ struct d3d12_pipeline_state { @@ -2047,6 +2084,8 @@ struct d3d12_pipeline_state bool pso_is_loaded_from_cached_blob; bool pso_is_fully_dynamic; + struct d3d12_descriptor_copy_template hoist_template; + struct vkd3d_private_store private_store; }; @@ -2313,6 +2352,14 @@ struct vkd3d_scratch_buffer VkDeviceSize offset; }; +struct vkd3d_scratch_allocation +{ + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceAddress va; + void *host_ptr; +}; + #define VKD3D_QUERY_TYPE_INDEX_OCCLUSION (0u) #define VKD3D_QUERY_TYPE_INDEX_PIPELINE_STATISTICS (1u) #define VKD3D_QUERY_TYPE_INDEX_TRANSFORM_FEEDBACK (2u) @@ -2728,6 +2775,39 @@ struct d3d12_command_list_sequence struct d3d12_command_list_iteration_indirect_meta *indirect_meta; }; +#define VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE (64 * 1024) +#define VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES ((64 * 1024) / sizeof(struct d3d12_command_list_descriptor_copy_word)) + +/* Represents a copy where we do: + * for i in range(count): + * store_u32(descriptor_buffer + (dst_offset + i) * sizeof(uint32_t), + * load_u32(base_va[set_index] + (src_offset + i) * sizeof(uint32_t))); + */ +struct d3d12_command_list_descriptor_copy_word +{ + uint32_t src_offset; + uint16_t dst_offset; + uint8_t set_index; + uint8_t count; +}; + +/* A batch is started when application sets descriptor heap. */ +struct d3d12_command_list_descriptor_copy_batch +{ + /* Represents the descriptor heaps. */ + VkDeviceAddress base_va[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + /* Used to detect OOB descriptor copies. Can replace with null descriptor. */ + uint32_t num_descriptors[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + + /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_WORDS * sizeof(uint32_t) worth of descriptors. */ + struct vkd3d_scratch_buffer descriptor_buffer; + /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_NUM_WORD_COPIES worth of d3d12_command_list_descriptor_copy_word. */ + struct vkd3d_scratch_buffer host_buffer; + + unsigned int descriptor_buffer_offset; + unsigned int num_copies; +}; + struct d3d12_command_list { d3d12_command_list_iface ID3D12GraphicsCommandList_iface; @@ -2754,6 +2834,10 @@ struct d3d12_command_list struct d3d12_command_list_sequence cmd; + struct d3d12_command_list_descriptor_copy_batch *copy_batches; + size_t copy_batches_count; + size_t copy_batches_size; + struct d3d12_rtv_desc rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; struct d3d12_rtv_desc dsv; uint32_t dsv_plane_optimal_mask; From d722e64de4abdfced9a339c018fc826326715249 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 20 Nov 2023 13:06:13 +0100 Subject: [PATCH 02/27] frog: Add metadata entries for hoisted descriptors. --- include/vkd3d_shader.h | 16 +++++++++++++++- libs/vkd3d/cache.c | 6 +++--- libs/vkd3d/vkd3d_private.h | 2 -- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/vkd3d_shader.h b/include/vkd3d_shader.h index 9066814abf..5351ae3b3d 100644 --- a/include/vkd3d_shader.h +++ b/include/vkd3d_shader.h @@ -77,6 +77,16 @@ enum vkd3d_shader_meta_flags VKD3D_SHADER_META_FLAG_FORCE_COMPUTE_BARRIER_AFTER_DISPATCH = 1 << 16, }; +/* Tweaked to make the meta struct a nice and round 64 bytes. */ +#define VKD3D_MAX_HOISTED_DESCRIPTORS 15 + +struct vkd3d_shader_meta_hoisted_desc +{ + uint16_t table_index : 6; /* API limit */ + uint16_t constant_offset : 7; /* 128 offset from table should be more than enough ... */ + uint16_t vk_descriptor_type : 3; /* The standard descriptor types we care about fit into 3 bits as-is. */ +}; + struct vkd3d_shader_meta { vkd3d_shader_hash_t hash; @@ -84,8 +94,10 @@ struct vkd3d_shader_meta unsigned int patch_vertex_count; /* Relevant for HS. May be 0, in which case the patch vertex count is not known. */ unsigned int cs_required_wave_size; /* If non-zero, force a specific CS subgroup size. */ uint32_t flags; /* vkd3d_shader_meta_flags */ + struct vkd3d_shader_meta_hoisted_desc hoist_desc[VKD3D_MAX_HOISTED_DESCRIPTORS]; + uint16_t num_hoisted_descriptors; }; -STATIC_ASSERT(sizeof(struct vkd3d_shader_meta) == 32); +STATIC_ASSERT(sizeof(struct vkd3d_shader_meta) == 64); struct vkd3d_shader_code { @@ -220,6 +232,7 @@ enum vkd3d_shader_interface_flag VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER = 0x00000010u, /* In this model, use descriptor_size_cbv_srv_uav as array stride for raw VA buffer. */ VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER = 0x00000020u, + VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS = 0x00000040u, }; struct vkd3d_shader_stage_io_entry @@ -278,6 +291,7 @@ struct vkd3d_shader_interface_info /* Used for either VKD3D_SHADER_INTERFACE_RAW_VA_ALIAS_DESCRIPTOR_BUFFER or local root signatures. */ uint32_t descriptor_size_cbv_srv_uav; uint32_t descriptor_size_sampler; + uint32_t hoist_descriptor_set_index; }; struct vkd3d_shader_descriptor_table diff --git a/libs/vkd3d/cache.c b/libs/vkd3d/cache.c index 4175ed900d..a8c1a75d3f 100644 --- a/libs/vkd3d/cache.c +++ b/libs/vkd3d/cache.c @@ -181,7 +181,7 @@ VkResult vkd3d_create_pipeline_cache(struct d3d12_device *device, return VK_CALL(vkCreatePipelineCache(device->vk_device, &info, NULL, cache)); } -#define VKD3D_CACHE_BLOB_VERSION MAKE_MAGIC('V','K','B',3) +#define VKD3D_CACHE_BLOB_VERSION MAKE_MAGIC('V','K','B',4) enum vkd3d_pipeline_blob_chunk_type { @@ -1314,8 +1314,8 @@ struct vkd3d_serialized_pipeline_toc_entry }; STATIC_ASSERT(sizeof(struct vkd3d_serialized_pipeline_toc_entry) == 16); -#define VKD3D_PIPELINE_LIBRARY_VERSION_TOC MAKE_MAGIC('V','K','L',4) -#define VKD3D_PIPELINE_LIBRARY_VERSION_STREAM MAKE_MAGIC('V','K','S',4) +#define VKD3D_PIPELINE_LIBRARY_VERSION_TOC MAKE_MAGIC('V','K','L',5) +#define VKD3D_PIPELINE_LIBRARY_VERSION_STREAM MAKE_MAGIC('V','K','S',5) struct vkd3d_serialized_pipeline_library_toc { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 43dc9d2e3a..fe8b4d38fe 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1663,7 +1663,6 @@ struct d3d12_bind_point_layout VkPushConstantRange push_constant_range; }; -#define VKD3D_MAX_HOISTED_DESCRIPTORS 16 struct vkd3d_descriptor_hoist_desc { uint32_t table_index; @@ -2032,7 +2031,6 @@ struct d3d12_descriptor_copy_template_entry uint8_t count; }; -#define VKD3D_MAX_HOISTED_DESCRIPTORS 16 /* Common case is VS + PS, MS + PS, CS. * Ignore hoisting outside these (tess/geom/task) to keep memory usage low and number of sets. */ #define VKD3D_MAX_HOIST_SHADER_STAGES 2 From 2e28d92132839ab1ad5d55c1f2ed101c71bc3287 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 20 Nov 2023 13:28:04 +0100 Subject: [PATCH 03/27] Tear down hoist layouts. --- libs/vkd3d/state.c | 5 +++++ libs/vkd3d/vkd3d_private.h | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 42aa68f75b..b9c921c92f 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2248,6 +2248,7 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) struct d3d12_device *device = state->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ULONG refcount = InterlockedDecrement(&state->internal_refcount); + unsigned int i; if (!refcount) { @@ -2265,6 +2266,10 @@ void d3d12_pipeline_state_dec_ref(struct d3d12_pipeline_state *state) if (state->root_signature) d3d12_root_signature_dec_ref(state->root_signature); + for (i = 0; i < ARRAY_SIZE(state->hoist_template.vk_hoist_descriptor_set_layouts); i++) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->hoist_template.vk_hoist_descriptor_set_layouts[i], NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->hoist_template.vk_hoist_descriptor_layout, NULL)); + if (state->pipeline_type == VKD3D_PIPELINE_TYPE_GRAPHICS || state->pipeline_type == VKD3D_PIPELINE_TYPE_MESH_GRAPHICS) d3d12_pipeline_state_free_cached_desc(&state->graphics.cached_desc); rwlock_destroy(&state->lock); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index fe8b4d38fe..3536d0a601 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2041,8 +2041,8 @@ struct d3d12_descriptor_copy_template unsigned int num_entries; /* For descriptor hoisting. Each pipeline stage gets its own set. */ - VkDescriptorSetLayout vk_hoist_descriptor_sets[VKD3D_MAX_HOIST_SHADER_STAGES]; - VkPipelineLayout vk_hoist_descriptor_set_layout; + VkDescriptorSetLayout vk_hoist_descriptor_set_layouts[VKD3D_MAX_HOIST_SHADER_STAGES]; + VkPipelineLayout vk_hoist_descriptor_layout; /* On draw time if table offsets are out of date: * - Allocate descriptor_allocation_words from d3d12_command_list_descriptor_copy_batch::descriptor_buffer. From 6ca089e9dae5a4716d64ff6e9cc4155a0b29880d Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 12:25:41 +0100 Subject: [PATCH 04/27] Add helper to create augmented pipeline layout. --- libs/vkd3d/state.c | 76 ++++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 10 +++++ 2 files changed, 86 insertions(+) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index b9c921c92f..b645666709 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1518,6 +1518,68 @@ HRESULT d3d12_root_signature_create_local_static_samplers_layout(struct d3d12_ro return S_OK; } +static HRESULT d3d12_root_signature_create_hoisted_descriptor_set_layout( + const struct d3d12_root_signature *root_signature, + VkShaderStageFlagBits stage, const struct vkd3d_shader_meta_hoisted_desc *descs, unsigned int num_descs, + VkDescriptorSetLayout *vk_set_layout) +{ + VkDescriptorSetLayoutBinding bindings[VKD3D_MAX_HOISTED_DESCRIPTORS]; + unsigned int i; + + for (i = 0; i < num_descs; i++) + { + bindings[i].descriptorType = (VkDescriptorType)descs[i].vk_descriptor_type; + bindings[i].descriptorCount = 1; + bindings[i].binding = i; + bindings[i].pImmutableSamplers = NULL; + bindings[i].stageFlags = stage; + } + + return vkd3d_create_descriptor_set_layout(root_signature->device, 0, + num_descs, bindings, VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + vk_set_layout); +} + +HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( + const struct d3d12_root_signature *root_signature, const struct d3d12_bind_point_layout *layout, + VkShaderStageFlagBits first_stage, const struct vkd3d_shader_meta_hoisted_desc *first_set, unsigned int first_set_count, + VkShaderStageFlagBits second_stage, const struct vkd3d_shader_meta_hoisted_desc *second_set, unsigned int second_set_count, + struct d3d12_descriptor_copy_template *copy_template) +{ + VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS + VKD3D_MAX_HOIST_SHADER_STAGES]; + unsigned int num_sets; + unsigned int i; + HRESULT hr; + + if (first_set_count == 0 && second_set_count == 0) + return S_OK; + + /* Have to create the gap descriptor set layout. */ + if (FAILED(hr = d3d12_root_signature_create_hoisted_descriptor_set_layout(root_signature, + first_stage, first_set, first_set_count, ©_template->vk_hoist_descriptor_set_layouts[0]))) + return hr; + + if (second_set_count && FAILED(hr = d3d12_root_signature_create_hoisted_descriptor_set_layout(root_signature, + second_stage, second_set, second_set_count, ©_template->vk_hoist_descriptor_set_layouts[1]))) + return hr; + + for (i = 0; i < layout->num_set_layouts; i++) + set_layouts[i] = root_signature->set_layouts[i]; + + num_sets = layout->num_set_layouts; + set_layouts[num_sets++] = copy_template->vk_hoist_descriptor_set_layouts[0]; + if (second_set_count) + set_layouts[num_sets++] = copy_template->vk_hoist_descriptor_set_layouts[1]; + + if (FAILED(hr = vkd3d_create_pipeline_layout(root_signature->device, + num_sets, set_layouts, + layout->push_constant_range.stageFlags ? 1 : 0, &layout->push_constant_range, + ©_template->vk_hoist_descriptor_layout))) + return hr; + + return S_OK; +} + static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signature, struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC2 *desc) { @@ -2440,6 +2502,8 @@ static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_sta struct vkd3d_shader_interface_info *shader_interface) { const struct d3d12_root_signature *root_signature = state->root_signature; + const struct d3d12_bind_point_layout *layout; + memset(shader_interface, 0, sizeof(*shader_interface)); shader_interface->flags = d3d12_root_signature_get_shader_interface_flags(root_signature, state->pipeline_type); shader_interface->min_ssbo_alignment = d3d12_device_get_ssbo_alignment(device); @@ -2460,6 +2524,18 @@ static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_sta shader_interface->descriptor_size_sampler = d3d12_device_get_descriptor_handle_increment_size( device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + /* Ignore tess, geom and task. Only bother with the common cases to keep number of sets down. */ + if (stage == VK_SHADER_STAGE_VERTEX_BIT || stage == VK_SHADER_STAGE_MESH_BIT_EXT || + stage == VK_SHADER_STAGE_FRAGMENT_BIT || stage == VK_SHADER_STAGE_COMPUTE_BIT) + { + layout = d3d12_root_signature_get_layout(root_signature, state->pipeline_type); + shader_interface->flags |= VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS; + shader_interface->hoist_descriptor_set_index = layout->num_set_layouts; + /* Make the set only depend on the root signature. */ + if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) + shader_interface->hoist_descriptor_set_index++; + } + if (stage == VK_SHADER_STAGE_MESH_BIT_EXT) { shader_interface->stage_output_map = &state->graphics.cached_desc.stage_io_map_ms_ps; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 3536d0a601..991f99e6b8 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1757,6 +1757,16 @@ unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_ enum vkd3d_pipeline_type pipeline_type); HRESULT d3d12_root_signature_create_local_static_samplers_layout(struct d3d12_root_signature *root_signature, VkDescriptorSetLayout vk_set_layout, VkPipelineLayout *vk_pipeline_layout); + +struct d3d12_descriptor_copy_template; +HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( + const struct d3d12_root_signature *root_signature, const struct d3d12_bind_point_layout *layout, + VkShaderStageFlagBits first_stage, const struct vkd3d_shader_meta_hoisted_desc *first_set, + unsigned int first_set_count, + VkShaderStageFlagBits second_stage, const struct vkd3d_shader_meta_hoisted_desc *second_set, + unsigned int second_set_count, + struct d3d12_descriptor_copy_template *copy_template); + HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts, unsigned int push_constant_count, const VkPushConstantRange *push_constants, From dad54a2c9026d677a322284ace668204838e8704 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 13:30:15 +0100 Subject: [PATCH 05/27] Create hoisted pipeline layout. --- libs/vkd3d/state.c | 176 +++++++++++++++++++++++++++---------- libs/vkd3d/vkd3d_private.h | 1 - 2 files changed, 129 insertions(+), 48 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index b645666709..b0e0bbf26c 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1518,13 +1518,24 @@ HRESULT d3d12_root_signature_create_local_static_samplers_layout(struct d3d12_ro return S_OK; } +static uint32_t vkd3d_get_descriptor_size_for_type(struct d3d12_device *device, VkDescriptorType vk_descriptor_type); + static HRESULT d3d12_root_signature_create_hoisted_descriptor_set_layout( const struct d3d12_root_signature *root_signature, VkShaderStageFlagBits stage, const struct vkd3d_shader_meta_hoisted_desc *descs, unsigned int num_descs, - VkDescriptorSetLayout *vk_set_layout) + struct d3d12_descriptor_copy_template *copy_template) { + struct d3d12_device *device = root_signature->device; + VkDescriptorSetLayoutBinding bindings[VKD3D_MAX_HOISTED_DESCRIPTORS]; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_descriptor_copy_template_entry *entry; + struct vkd3d_shader_descriptor_binding binding; + VkDescriptorSetLayout vk_set_layout; + VkDeviceSize desc_offset; + uint32_t bindless_flags; unsigned int i; + HRESULT hr; for (i = 0; i < num_descs; i++) { @@ -1535,9 +1546,69 @@ static HRESULT d3d12_root_signature_create_hoisted_descriptor_set_layout( bindings[i].stageFlags = stage; } - return vkd3d_create_descriptor_set_layout(root_signature->device, 0, + if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, 0, num_descs, bindings, VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, - vk_set_layout); + &vk_set_layout))) + return hr; + + copy_template->descriptor_allocation_size = align( + copy_template->descriptor_allocation_size, + device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment); + copy_template->descriptor_offsets[copy_template->num_hoist_sets] = copy_template->descriptor_allocation_size; + VK_CALL(vkGetDescriptorSetLayoutSizeEXT(device->vk_device, vk_set_layout, &desc_offset)); + copy_template->descriptor_allocation_size += desc_offset; + + for (i = 0; i < num_descs; i++) + { + entry = ©_template->entries[copy_template->num_entries++]; + entry->constant_offset = descs[i].constant_offset; + entry->table_index = descs[i].table_index; + + switch (bindings[i].descriptorType) + { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + bindless_flags = VKD3D_BINDLESS_SET_IMAGE | VKD3D_BINDLESS_SET_SRV; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + bindless_flags = VKD3D_BINDLESS_SET_IMAGE | VKD3D_BINDLESS_SET_UAV; + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + bindless_flags = VKD3D_BINDLESS_SET_BUFFER | VKD3D_BINDLESS_SET_SRV; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + bindless_flags = VKD3D_BINDLESS_SET_BUFFER | VKD3D_BINDLESS_SET_UAV; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + bindless_flags = VKD3D_BINDLESS_SET_SRV | VKD3D_BINDLESS_SET_UAV | VKD3D_BINDLESS_SET_RAW_SSBO; + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + bindless_flags = VKD3D_BINDLESS_SET_CBV; + break; + + case VK_DESCRIPTOR_TYPE_SAMPLER: + bindless_flags = VKD3D_BINDLESS_SET_SAMPLER; + break; + + default: + WARN("Unknown descriptor type %u.\n", bindings[i].descriptorType); + return E_INVALIDARG; + } + + vkd3d_bindless_state_find_binding(&device->bindless_state, bindless_flags, &binding); + entry->set_index = binding.set; + entry->count = vkd3d_get_descriptor_size_for_type(root_signature->device, bindings[i].descriptorType) / sizeof(uint32_t); + + VK_CALL(vkGetDescriptorSetLayoutBindingOffsetEXT(device->vk_device, vk_set_layout, i, &desc_offset)); + entry->dst_offset_words = (copy_template->descriptor_offsets[copy_template->num_hoist_sets] + desc_offset) / sizeof(uint32_t); + } + + copy_template->vk_hoist_descriptor_set_layouts[copy_template->num_hoist_sets++] = vk_set_layout; + return S_OK; } HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( @@ -1554,22 +1625,26 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( if (first_set_count == 0 && second_set_count == 0) return S_OK; + copy_template->first_hoist_set_index = layout->num_set_layouts; + /* Have to create the gap descriptor set layout. */ if (FAILED(hr = d3d12_root_signature_create_hoisted_descriptor_set_layout(root_signature, - first_stage, first_set, first_set_count, ©_template->vk_hoist_descriptor_set_layouts[0]))) + first_stage, first_set, first_set_count, + copy_template))) return hr; if (second_set_count && FAILED(hr = d3d12_root_signature_create_hoisted_descriptor_set_layout(root_signature, - second_stage, second_set, second_set_count, ©_template->vk_hoist_descriptor_set_layouts[1]))) + second_stage, second_set, second_set_count, + copy_template))) return hr; for (i = 0; i < layout->num_set_layouts; i++) set_layouts[i] = root_signature->set_layouts[i]; num_sets = layout->num_set_layouts; - set_layouts[num_sets++] = copy_template->vk_hoist_descriptor_set_layouts[0]; - if (second_set_count) - set_layouts[num_sets++] = copy_template->vk_hoist_descriptor_set_layouts[1]; + + for (i = 0; i < copy_template->num_hoist_sets; i++) + set_layouts[num_sets++] = copy_template->vk_hoist_descriptor_set_layouts[i]; if (FAILED(hr = vkd3d_create_pipeline_layout(root_signature->device, num_sets, set_layouts, @@ -2525,8 +2600,8 @@ static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_sta device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); /* Ignore tess, geom and task. Only bother with the common cases to keep number of sets down. */ - if (stage == VK_SHADER_STAGE_VERTEX_BIT || stage == VK_SHADER_STAGE_MESH_BIT_EXT || - stage == VK_SHADER_STAGE_FRAGMENT_BIT || stage == VK_SHADER_STAGE_COMPUTE_BIT) + if (/*stage == VK_SHADER_STAGE_VERTEX_BIT || stage == VK_SHADER_STAGE_MESH_BIT_EXT || + stage == VK_SHADER_STAGE_FRAGMENT_BIT || */stage == VK_SHADER_STAGE_COMPUTE_BIT) { layout = d3d12_root_signature_get_layout(root_signature, state->pipeline_type); shader_interface->flags |= VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS; @@ -2837,6 +2912,42 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea } } +static HRESULT d3d12_pipeline_state_create_hoisted_pipeline_layout(struct d3d12_pipeline_state *state) +{ + const struct vkd3d_shader_meta *metas[2] = { NULL }; + VkShaderStageFlagBits stages[2]; + unsigned int i; + + if (state->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) + { + metas[0] = &state->compute.code.meta; + stages[0] = VK_SHADER_STAGE_COMPUTE_BIT; + } + else + { + for (i = 0; i < state->graphics.stage_count; i++) + { + if (state->graphics.stages[i].stage == VK_SHADER_STAGE_VERTEX_BIT || + state->graphics.stages[i].stage == VK_SHADER_STAGE_MESH_BIT_EXT) + { + stages[0] = state->graphics.stages[i].stage; + metas[0] = &state->graphics.code[i].meta; + } + else if (state->graphics.stages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT) + { + stages[1] = state->graphics.stages[i].stage; + metas[1] = &state->graphics.code[i].meta; + } + } + } + + return d3d12_root_signature_create_hoisted_descriptor_layout(state->root_signature, + d3d12_root_signature_get_layout(state->root_signature, state->pipeline_type), + stages[0], metas[0] ? metas[0]->hoist_desc : NULL, metas[0] ? metas[0]->num_hoisted_descriptors : 0, + stages[1], metas[1] ? metas[1]->hoist_desc : NULL, metas[1] ? metas[1]->num_hoisted_descriptors : 0, + &state->hoist_template); +} + static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, struct d3d12_device *device, const D3D12_SHADER_BYTECODE *code) @@ -2889,7 +3000,14 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_pipeline_state *state, &state->compute.identifier)); } - pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; + if (FAILED(hr = d3d12_pipeline_state_create_hoisted_pipeline_layout(state))) + return hr; + + if (state->hoist_template.vk_hoist_descriptor_layout) + pipeline_info.layout = state->hoist_template.vk_hoist_descriptor_layout; + else + pipeline_info.layout = state->root_signature->compute.vk_pipeline_layout; + pipeline_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_info.basePipelineIndex = -1; @@ -6655,42 +6773,6 @@ bool vkd3d_bindless_state_find_binding(const struct vkd3d_bindless_state *bindle return false; } -struct vkd3d_descriptor_binding vkd3d_bindless_state_find_set(const struct vkd3d_bindless_state *bindless_state, uint32_t flags) -{ - struct vkd3d_descriptor_binding binding; - D3D12_DESCRIPTOR_HEAP_TYPE heap_type; - unsigned int i, set_index = 0; - - heap_type = flags & VKD3D_BINDLESS_SET_SAMPLER - ? D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER - : D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - - for (i = 0; i < bindless_state->set_count; i++) - { - const struct vkd3d_bindless_set_info *set_info = &bindless_state->set_info[i]; - - if (set_info->heap_type == heap_type) - { - if ((set_info->flags & flags) == flags) - { - binding.set = set_index; - binding.binding = set_info->binding_index; - - if (flags & VKD3D_BINDLESS_SET_EXTRA_MASK) - binding.binding = vkd3d_bindless_state_get_extra_binding_index(flags, set_info->flags); - return binding; - } - - set_index++; - } - } - - ERR("No set found for flags %#x.", flags); - binding.set = 0; - binding.binding = 0; - return binding; -} - uint32_t vkd3d_bindless_state_find_set_info_index(const struct vkd3d_bindless_state *bindless_state, uint32_t flags) { D3D12_DESCRIPTOR_HEAP_TYPE heap_type; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 991f99e6b8..9c0cf99502 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -3736,7 +3736,6 @@ void vkd3d_bindless_state_cleanup(struct vkd3d_bindless_state *bindless_state, struct d3d12_device *device); bool vkd3d_bindless_state_find_binding(const struct vkd3d_bindless_state *bindless_state, uint32_t flags, struct vkd3d_shader_descriptor_binding *binding); -struct vkd3d_descriptor_binding vkd3d_bindless_state_find_set(const struct vkd3d_bindless_state *bindless_state, uint32_t flags); uint32_t vkd3d_bindless_state_find_set_info_index(const struct vkd3d_bindless_state *bindless_state, uint32_t flags); From 605fa6b2d0fa713112aef03765c270f8d38838c4 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 14:09:32 +0100 Subject: [PATCH 06/27] Hook up hoisting logic for DXIL. --- include/vkd3d_shader.h | 1 + libs/vkd3d-shader/dxil.c | 70 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/include/vkd3d_shader.h b/include/vkd3d_shader.h index 5351ae3b3d..9481012d6a 100644 --- a/include/vkd3d_shader.h +++ b/include/vkd3d_shader.h @@ -79,6 +79,7 @@ enum vkd3d_shader_meta_flags /* Tweaked to make the meta struct a nice and round 64 bytes. */ #define VKD3D_MAX_HOISTED_DESCRIPTORS 15 +#define VKD3D_MAX_HOISTED_DESCRIPTOR_CONSTANT_OFFSET (1 << 7) struct vkd3d_shader_meta_hoisted_desc { diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index dd2f44234a..16b2b48b7c 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -123,6 +123,7 @@ struct vkd3d_dxil_remap_userdata const struct vkd3d_shader_interface_info *shader_interface_info; const struct vkd3d_shader_interface_local_info *shader_interface_local_info; unsigned int num_root_descriptors; + struct vkd3d_shader_meta *meta; }; struct vkd3d_dxil_remap_info @@ -253,6 +254,32 @@ static dxil_spv_bool dxil_remap(const struct vkd3d_dxil_remap_userdata *remap, return DXIL_SPV_TRUE; } +static void dxil_remap_check_hoist( + const struct vkd3d_shader_interface_info *shader_interface_info, + const struct vkd3d_dxil_remap_userdata *remap, const dxil_spv_d3d_binding *d3d_binding, + dxil_spv_vulkan_binding *vk_binding, + VkDescriptorType vk_descriptor_type) +{ + struct vkd3d_shader_meta_hoisted_desc *hoisted; + + if ((shader_interface_info->flags & VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS) && + vk_binding->bindless.use_heap == DXIL_SPV_TRUE && + remap->meta->num_hoisted_descriptors < VKD3D_MAX_HOISTED_DESCRIPTORS && + d3d_binding->range_size == 1 && + vk_binding->bindless.heap_root_offset < VKD3D_MAX_HOISTED_DESCRIPTOR_CONSTANT_OFFSET) + { + hoisted = &remap->meta->hoist_desc[remap->meta->num_hoisted_descriptors]; + hoisted->table_index = vk_binding->root_constant_index - + shader_interface_info->descriptor_tables.offset / sizeof(uint32_t); + hoisted->constant_offset = vk_binding->bindless.heap_root_offset; + hoisted->vk_descriptor_type = vk_descriptor_type; + vk_binding->bindless.use_heap = DXIL_SPV_FALSE; + vk_binding->set = shader_interface_info->hoist_descriptor_set_index; + vk_binding->binding = remap->meta->num_hoisted_descriptors; + remap->meta->num_hoisted_descriptors++; + } +} + static dxil_spv_bool dxil_srv_remap(void *userdata, const dxil_spv_d3d_binding *d3d_binding, dxil_spv_srv_vulkan_binding *vk_binding) { @@ -275,6 +302,12 @@ static dxil_spv_bool dxil_srv_remap(void *userdata, const dxil_spv_d3d_binding * vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; } + else + { + dxil_remap_check_hoist(shader_interface_info, remap, d3d_binding, &vk_binding->buffer_binding, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + } + return DXIL_SPV_TRUE; } else @@ -285,6 +318,11 @@ static dxil_spv_bool dxil_srv_remap(void *userdata, const dxil_spv_d3d_binding * vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; } + else + { + dxil_remap_check_hoist(shader_interface_info, remap, d3d_binding, &vk_binding->buffer_binding, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + } } return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, @@ -295,8 +333,13 @@ static dxil_spv_bool dxil_sampler_remap(void *userdata, const dxil_spv_d3d_bindi dxil_spv_vulkan_binding *vk_binding) { const struct vkd3d_dxil_remap_userdata *remap = userdata; - return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, - d3d_binding, vk_binding, VKD3D_SHADER_BINDING_FLAG_IMAGE); + + if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, + d3d_binding, vk_binding, VKD3D_SHADER_BINDING_FLAG_IMAGE)) + return DXIL_SPV_FALSE; + + dxil_remap_check_hoist(remap->shader_interface_info, remap, d3d_binding, vk_binding, VK_DESCRIPTOR_TYPE_SAMPLER); + return DXIL_SPV_TRUE; } static dxil_spv_bool dxil_input_remap(void *userdata, const dxil_spv_d3d_vertex_input *d3d_input, @@ -421,6 +464,12 @@ static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_bindi vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; } + else if (!d3d_binding->has_counter) + { + dxil_remap_check_hoist(shader_interface_info, remap, + &d3d_binding->d3d_binding, &vk_binding->buffer_binding, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + } } else if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &d3d_binding->d3d_binding, &vk_binding->buffer_binding, resource_flags)) @@ -453,6 +502,12 @@ static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_bindi vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; } + else if (!d3d_binding->has_counter) + { + dxil_remap_check_hoist(shader_interface_info, remap, + &d3d_binding->d3d_binding, &vk_binding->buffer_binding, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + } } if (d3d_binding->has_counter) @@ -499,9 +554,14 @@ static dxil_spv_bool dxil_cbv_remap(void *userdata, const dxil_spv_d3d_binding * } vk_binding->push_constant = DXIL_SPV_FALSE; - return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, d3d_binding, &vk_binding->vulkan.uniform_binding, - VKD3D_SHADER_BINDING_FLAG_BUFFER); + VKD3D_SHADER_BINDING_FLAG_BUFFER)) + return DXIL_SPV_FALSE; + + dxil_remap_check_hoist(remap->shader_interface_info, remap, d3d_binding, + &vk_binding->vulkan.uniform_binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + return DXIL_SPV_TRUE; } static void vkd3d_dxil_log_callback(void *userdata, dxil_spv_log_level level, const char *msg) @@ -1011,6 +1071,7 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc, remap_userdata.shader_interface_info = shader_interface_info; remap_userdata.shader_interface_local_info = NULL; remap_userdata.num_root_descriptors = num_root_descriptors; + remap_userdata.meta = &spirv->meta; dxil_spv_converter_set_root_constant_word_count(converter, root_constant_words); dxil_spv_converter_set_root_descriptor_count(converter, num_root_descriptors); @@ -1564,6 +1625,7 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil, remap_userdata.shader_interface_info = shader_interface_info; remap_userdata.shader_interface_local_info = shader_interface_local_info; remap_userdata.num_root_descriptors = num_root_descriptors; + remap_userdata.meta = &spirv->meta; dxil_spv_converter_set_root_constant_word_count(converter, root_constant_words); dxil_spv_converter_set_root_descriptor_count(converter, num_root_descriptors); From f7415f22d9bfc3a39f6698681c1ee54a8528d37a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 14:35:06 +0100 Subject: [PATCH 07/27] Fix some hoisting bugs. --- libs/vkd3d-shader/dxil.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index 16b2b48b7c..be52192d3e 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -318,15 +318,20 @@ static dxil_spv_bool dxil_srv_remap(void *userdata, const dxil_spv_d3d_binding * vk_binding->offset_binding.set = shader_interface_info->offset_buffer_binding->set; vk_binding->offset_binding.binding = shader_interface_info->offset_buffer_binding->binding; } - else - { - dxil_remap_check_hoist(shader_interface_info, remap, d3d_binding, &vk_binding->buffer_binding, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); - } } - return dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, - d3d_binding, &vk_binding->buffer_binding, resource_flags); + if (!dxil_remap(remap, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, + d3d_binding, &vk_binding->buffer_binding, resource_flags)) + return DXIL_SPV_FALSE; + + if (!(shader_interface_info->flags & VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER)) + { + dxil_remap_check_hoist(shader_interface_info, remap, d3d_binding, &vk_binding->buffer_binding, + d3d_binding->kind == DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER ? + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); + } + + return DXIL_SPV_TRUE; } static dxil_spv_bool dxil_sampler_remap(void *userdata, const dxil_spv_d3d_binding *d3d_binding, @@ -506,7 +511,8 @@ static dxil_spv_bool dxil_uav_remap(void *userdata, const dxil_spv_uav_d3d_bindi { dxil_remap_check_hoist(shader_interface_info, remap, &d3d_binding->d3d_binding, &vk_binding->buffer_binding, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + d3d_binding->d3d_binding.kind == DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER ? + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); } } From e2178771444acf39e90a86cd4d215da813c63642 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 16:08:14 +0100 Subject: [PATCH 08/27] Bind scratch descriptor buffer. --- libs/vkd3d/command.c | 89 ++++++++++++++++++++++++++++++++------ libs/vkd3d/device.c | 26 +++++++++++ libs/vkd3d/heap.c | 1 + libs/vkd3d/memory.c | 1 + libs/vkd3d/resource.c | 1 + libs/vkd3d/vkd3d_private.h | 27 ++++++++---- 6 files changed, 122 insertions(+), 23 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d7e67e4c0f..3d0f45c5ae 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5398,8 +5398,10 @@ static void d3d12_command_list_init_default_descriptor_buffers(struct d3d12_comm { if (d3d12_device_uses_descriptor_buffers(list->device)) { - list->descriptor_heap.buffers.heap_va_resource = list->device->global_descriptor_buffer.resource.va; - list->descriptor_heap.buffers.heap_va_sampler = list->device->global_descriptor_buffer.sampler.va; + list->descriptor_heap.buffers.heap_va[0] = list->device->global_descriptor_buffer.resource.va; + list->descriptor_heap.buffers.heap_va[1] = list->device->global_descriptor_buffer.sampler.va; + list->descriptor_heap.buffers.mapped[0] = NULL; + list->descriptor_heap.buffers.mapped[1] = NULL; list->descriptor_heap.buffers.vk_buffer_resource = list->device->global_descriptor_buffer.resource.vk_buffer; list->descriptor_heap.buffers.heap_dirty = true; } @@ -5927,7 +5929,10 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; VkDescriptorBufferBindingPushDescriptorBufferHandleEXT buffer_handle; - VkDescriptorBufferBindingInfoEXT global_buffers[2]; + struct d3d12_command_list_descriptor_copy_batch *batch; + VkDescriptorBufferBindingInfoEXT global_buffers[3]; + uint32_t num_global_buffers; + unsigned int i; if (d3d12_device_uses_descriptor_buffers(list->device) && list->descriptor_heap.buffers.heap_dirty) @@ -5935,7 +5940,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li global_buffers[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; global_buffers[0].pNext = NULL; global_buffers[0].usage = list->device->global_descriptor_buffer.resource.usage; - global_buffers[0].address = list->descriptor_heap.buffers.heap_va_resource; + global_buffers[0].address = list->descriptor_heap.buffers.heap_va[0]; if (global_buffers[0].usage & VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT) { @@ -5948,10 +5953,53 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li global_buffers[1].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; global_buffers[1].pNext = NULL; global_buffers[1].usage = list->device->global_descriptor_buffer.sampler.usage; - global_buffers[1].address = list->descriptor_heap.buffers.heap_va_sampler; + global_buffers[1].address = list->descriptor_heap.buffers.heap_va[1]; + + num_global_buffers = 2; + + /* if (using hoisting) */ + { + /* If we're doing hoisting, allocate a new chunk now. */ + vkd3d_array_reserve((void **)&list->copy_batches, &list->copy_batches_size, + list->copy_batches_count + 1, sizeof(*list->copy_batches)); + batch = &list->copy_batches[list->copy_batches_count++]; + + d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_DESCRIPTOR_BUFFER, + VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE, + list->device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment, + ~0u, &batch->descriptor_buffer); + + d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, + VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES, 64, + ~0u, &batch->host_buffer); + + batch->descriptor_buffer_offset = 0; + batch->num_copies = 0; + + for (i = 0; i < list->device->bindless_state.set_count; i++) + { + unsigned int buffer_index = list->device->bindless_state.vk_descriptor_buffer_indices[i]; + /*batch->heaps[i].base_va = global_buffers[buffer_index].address + list->descriptor_heap.buffers.vk_payload_offsets[i];*/ + /* HACK: Use CPU side copy for now. */ + batch->heaps[i].base_va = (VkDeviceAddress) + ((uint8_t *)list->descriptor_heap.buffers.mapped[buffer_index] + list->descriptor_heap.buffers.vk_payload_offsets[i]); + batch->heaps[i].num_descriptors = list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[buffer_index]; + batch->heaps[i].stride_words = list->descriptor_heap.buffers.vk_descriptor_stride_words[i]; + } + + global_buffers[num_global_buffers].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; + global_buffers[num_global_buffers].pNext = NULL; + global_buffers[num_global_buffers].usage = + VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; + global_buffers[num_global_buffers].address = batch->descriptor_buffer.va; + num_global_buffers++; + } VK_CALL(vkCmdBindDescriptorBuffersEXT(list->cmd.vk_command_buffer, - ARRAY_SIZE(global_buffers), global_buffers)); + num_global_buffers, global_buffers)); list->descriptor_heap.buffers.heap_dirty = false; } @@ -5977,7 +6025,7 @@ static void d3d12_command_list_update_descriptor_heaps(struct d3d12_command_list VK_CALL(vkCmdSetDescriptorBufferOffsetsEXT(list->cmd.vk_command_buffer, vk_bind_point, layout, 0, bindless_state->set_count, bindless_state->vk_descriptor_buffer_indices, - list->descriptor_heap.buffers.vk_offsets)); + list->descriptor_heap.buffers.vk_bind_offsets)); bindings->descriptor_heap_dirty_mask = 0; } } @@ -9517,8 +9565,8 @@ static void d3d12_command_list_set_descriptor_heaps_buffers(struct d3d12_command struct d3d12_desc_split d; unsigned int i, j; - current_resource_va = list->descriptor_heap.buffers.heap_va_resource; - current_sampler_va = list->descriptor_heap.buffers.heap_va_sampler; + current_resource_va = list->descriptor_heap.buffers.heap_va[0]; + current_sampler_va = list->descriptor_heap.buffers.heap_va[1]; for (i = 0; i < heap_count; i++) { @@ -9530,8 +9578,10 @@ static void d3d12_command_list_set_descriptor_heaps_buffers(struct d3d12_command if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { - list->descriptor_heap.buffers.heap_va_resource = heap->descriptor_buffer.va; + list->descriptor_heap.buffers.heap_va[0] = heap->descriptor_buffer.va; + list->descriptor_heap.buffers.mapped[0] = heap->descriptor_buffer.host_allocation; list->descriptor_heap.buffers.vk_buffer_resource = heap->descriptor_buffer.vk_buffer; + list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[0] = heap->desc.NumDescriptors; if (!d3d12_device_use_embedded_mutable_descriptors(list->device)) { @@ -9542,16 +9592,27 @@ static void d3d12_command_list_set_descriptor_heaps_buffers(struct d3d12_command } else if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) { - list->descriptor_heap.buffers.heap_va_sampler = heap->descriptor_buffer.va; + list->descriptor_heap.buffers.heap_va[1] = heap->descriptor_buffer.va; + list->descriptor_heap.buffers.mapped[1] = heap->descriptor_buffer.host_allocation; + list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[1] = heap->desc.NumDescriptors; } for (j = 0; j < bindless_state->set_count; j++) + { if (bindless_state->set_info[j].heap_type == heap->desc.Type) - list->descriptor_heap.buffers.vk_offsets[j] = heap->descriptor_buffer.offsets[set_index++]; + { + list->descriptor_heap.buffers.vk_bind_offsets[j] = heap->descriptor_buffer.offsets[set_index]; + list->descriptor_heap.buffers.vk_payload_offsets[j] = + heap->descriptor_buffer.offsets[set_index] + bindless_state->set_info[j].host_mapping_offset; + list->descriptor_heap.buffers.vk_descriptor_stride_words[j] = + bindless_state->set_info[j].host_mapping_descriptor_size / sizeof(uint32_t); + set_index++; + } + } } - if (current_resource_va == list->descriptor_heap.buffers.heap_va_resource && - current_sampler_va == list->descriptor_heap.buffers.heap_va_sampler) + if (current_resource_va == list->descriptor_heap.buffers.heap_va[0] && + current_sampler_va == list->descriptor_heap.buffers.heap_va[1]) return; list->descriptor_heap.buffers.heap_dirty = true; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index c7a1054f42..f22833cc86 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2898,6 +2898,7 @@ static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, e alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; alloc_info.vk_memory_priority = vkd3d_convert_to_vk_prio(D3D12_RESIDENCY_PRIORITY_NORMAL); + alloc_info.explicit_global_buffer_usage = 0; if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, &alloc_info, &scratch->allocation))) @@ -2935,6 +2936,31 @@ static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, e alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; alloc_info.vk_memory_priority = vkd3d_convert_to_vk_prio(D3D12_RESIDENCY_PRIORITY_NORMAL); + alloc_info.explicit_global_buffer_usage = 0; + + if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, + &alloc_info, &scratch->allocation))) + return hr; + } + else if (kind == VKD3D_SCRATCH_POOL_KIND_DESCRIPTOR_BUFFER) + { + struct vkd3d_allocate_heap_memory_info alloc_info; + + /* We only care about memory types for INDIRECT_PREPROCESS. */ + assert(memory_types == ~0u); + + memset(&alloc_info, 0, sizeof(alloc_info)); + alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; /* TODO: DEFAULT */ + alloc_info.heap_desc.SizeInBytes = size; + alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + alloc_info.extra_allocation_flags = + VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH; + alloc_info.vk_memory_priority = vkd3d_convert_to_vk_prio(D3D12_RESIDENCY_PRIORITY_NORMAL); + alloc_info.explicit_global_buffer_usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator, &alloc_info, &scratch->allocation))) diff --git a/libs/vkd3d/heap.c b/libs/vkd3d/heap.c index f7818abbe4..fa971b26d4 100644 --- a/libs/vkd3d/heap.c +++ b/libs/vkd3d/heap.c @@ -280,6 +280,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev alloc_info.heap_desc = heap->desc; alloc_info.host_ptr = host_address; alloc_info.extra_allocation_flags = 0; + alloc_info.explicit_global_buffer_usage = 0; if (FAILED(hr = vkd3d_private_store_init(&heap->private_store))) return hr; diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index 22528a4108..e817b9e9ec 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -1766,6 +1766,7 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem alloc_info.heap_flags = info->heap_desc.Flags; alloc_info.host_ptr = info->host_ptr; alloc_info.vk_memory_priority = info->vk_memory_priority; + alloc_info.explicit_global_buffer_usage = info->explicit_global_buffer_usage; alloc_info.flags |= info->extra_allocation_flags; if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 0ec62cee79..e40073476a 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -3429,6 +3429,7 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12 allocate_info.heap_desc.SizeInBytes = align(desc->Width, allocate_info.heap_desc.Alignment); allocate_info.heap_desc.Flags = heap_flags | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; allocate_info.vk_memory_priority = object->priority.residency_count ? vkd3d_convert_to_vk_prio(object->priority.d3d12priority) : 0.f; + allocate_info.explicit_global_buffer_usage = 0; /* Be very careful with suballocated buffers. */ if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV) && diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 9c0cf99502..30f330f8c6 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -653,6 +653,7 @@ struct vkd3d_allocate_heap_memory_info void *host_ptr; uint32_t extra_allocation_flags; float vk_memory_priority; + VkBufferUsageFlags explicit_global_buffer_usage; }; struct vkd3d_allocate_resource_memory_info @@ -2398,6 +2399,7 @@ enum vkd3d_scratch_pool_kind VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0, VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS, VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, + VKD3D_SCRATCH_POOL_KIND_DESCRIPTOR_BUFFER, VKD3D_SCRATCH_POOL_KIND_COUNT }; @@ -2728,12 +2730,15 @@ union vkd3d_descriptor_heap_state { struct { - VkDeviceAddress heap_va_resource; - VkDeviceAddress heap_va_sampler; + VkDeviceAddress heap_va[2]; + void *mapped[2]; + uint32_t vk_descriptor_count_for_buffer_index[2]; VkBuffer vk_buffer_resource; bool heap_dirty; - VkDeviceSize vk_offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + VkDeviceSize vk_bind_offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + uint32_t vk_payload_offsets[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + uint32_t vk_descriptor_stride_words[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; } buffers; struct @@ -2802,15 +2807,19 @@ struct d3d12_command_list_descriptor_copy_word /* A batch is started when application sets descriptor heap. */ struct d3d12_command_list_descriptor_copy_batch { - /* Represents the descriptor heaps. */ - VkDeviceAddress base_va[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - /* Used to detect OOB descriptor copies. Can replace with null descriptor. */ - uint32_t num_descriptors[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; + /* Represents the descriptor heaps. When descriptor heaps are rebound (rare), + * need to start a new batch. */ + struct + { + VkDeviceAddress base_va; + uint32_t num_descriptors; + uint32_t stride_words; + } heaps[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_WORDS * sizeof(uint32_t) worth of descriptors. */ - struct vkd3d_scratch_buffer descriptor_buffer; + struct vkd3d_scratch_allocation descriptor_buffer; /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_NUM_WORD_COPIES worth of d3d12_command_list_descriptor_copy_word. */ - struct vkd3d_scratch_buffer host_buffer; + struct vkd3d_scratch_allocation host_buffer; unsigned int descriptor_buffer_offset; unsigned int num_copies; From 08115413a8258a65bf458a43b4f65bfb375b6fa8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 21 Nov 2023 16:47:15 +0100 Subject: [PATCH 09/27] fix alloc size. --- libs/vkd3d/command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 3d0f45c5ae..93c1554d9f 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5972,8 +5972,8 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, - VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES, 64, - ~0u, &batch->host_buffer); + VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES * sizeof(struct d3d12_command_list_descriptor_copy_word), + 64, ~0u, &batch->host_buffer); batch->descriptor_buffer_offset = 0; batch->num_copies = 0; From f80898250c3bc00b854be4e908e7b6a48c6b98f2 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 12:12:36 +0100 Subject: [PATCH 10/27] Add separate bit for hoisted buffer descriptors. Clarify the difference between that and push descriptor hoisting. --- libs/vkd3d/command.c | 16 ++++++++-------- libs/vkd3d/vkd3d_private.h | 7 ++++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 93c1554d9f..1d182266ea 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4646,7 +4646,7 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l if (bindings->root_signature->vk_sampler_descriptor_layout) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; if (bindings->root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; d3d12_command_list_invalidate_push_constants(bindings); @@ -6254,7 +6254,7 @@ static void d3d12_command_list_update_root_descriptors(struct d3d12_command_list } } -static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_list *list, +static void d3d12_command_list_update_hoisted_push_descriptors(struct d3d12_command_list *list, struct vkd3d_pipeline_bindings *bindings) { const struct d3d12_root_signature *rs = bindings->root_signature; @@ -6307,7 +6307,7 @@ static void d3d12_command_list_update_hoisted_descriptors(struct d3d12_command_l } } - bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; + bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list) @@ -6335,8 +6335,8 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis d3d12_command_list_update_static_samplers(list, bindings, vk_bind_point, layout); /* If we can, hoist descriptors from the descriptor heap into fake root parameters. */ - if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS) - d3d12_command_list_update_hoisted_descriptors(list, bindings); + if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS) + d3d12_command_list_update_hoisted_push_descriptors(list, bindings); if (bind_point_layout->flags & VKD3D_ROOT_SIGNATURE_USE_PUSH_CONSTANT_UNIFORM_BLOCK) { @@ -9554,7 +9554,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(d3d12_command_lis static void vkd3d_pipeline_bindings_set_dirty_sets(struct vkd3d_pipeline_bindings *bindings, uint64_t dirty_mask) { bindings->descriptor_heap_dirty_mask = dirty_mask; - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } static void d3d12_command_list_set_descriptor_heaps_buffers(struct d3d12_command_list *list, @@ -9730,7 +9730,7 @@ static inline void d3d12_command_list_set_descriptor_table_embedded(struct d3d12 if (root_signature->descriptor_table_count) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; if (root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } VKD3D_BREADCRUMB_AUX32(index); @@ -9751,7 +9751,7 @@ static inline void d3d12_command_list_set_descriptor_table(struct d3d12_command_ if (root_signature->descriptor_table_count) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; if (root_signature->hoist_info.num_desc) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS; + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } VKD3D_BREADCRUMB_AUX32(index); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 30f330f8c6..6b16f407f4 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2456,9 +2456,10 @@ bool d3d12_command_allocator_allocate_query_from_type_index( enum vkd3d_pipeline_dirty_flag { - VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u, - VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u, - VKD3D_PIPELINE_DIRTY_HOISTED_DESCRIPTORS = 0x00000004u, + VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET = 0x00000001u, + VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS = 0x00000002u, + VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS = 0x00000004u, + VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS = 0x00000008u, }; struct vkd3d_root_descriptor_info From 7749b49259dc26d459df62ff9f82843973985628 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 13:20:13 +0100 Subject: [PATCH 11/27] Build batch data structures. --- libs/vkd3d/command.c | 111 ++++++++++++++++++++++++++++++++++++- libs/vkd3d/state.c | 19 +++++++ libs/vkd3d/vkd3d_private.h | 5 +- 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 1d182266ea..d4f220087b 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4647,6 +4647,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; if (bindings->root_signature->hoist_info.num_desc) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; + if (list->state && list->state->hoist_template.num_hoist_sets) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; d3d12_command_list_invalidate_push_constants(bindings); @@ -5887,6 +5889,66 @@ static void d3d12_command_list_update_descriptor_table_offsets(struct d3d12_comm bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; } +static void d3d12_command_list_update_hoisted_buffer_descriptors(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, + VkPipelineBindPoint vk_bind_point) +{ + const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_descriptor_copy_template *copy_template; + struct d3d12_command_list_descriptor_copy_desc *copy_desc; + const struct d3d12_descriptor_copy_template_entry *entry; + struct d3d12_command_list_descriptor_copy_batch *batch; + static const uint32_t vk_buffer_indices[2] = { 2, 2 }; + const struct vkd3d_shader_descriptor_table *table; + uint32_t table_offsets[D3D12_MAX_ROOT_COST]; + unsigned int root_parameter_index; + uint64_t descriptor_table_mask; + VkDeviceSize vk_offsets[2]; + uint16_t base_dst_offset; + unsigned int i; + + assert(list->copy_batches_count); + batch = &list->copy_batches[list->copy_batches_count - 1]; + copy_template = &list->state->hoist_template; + + copy_desc = batch->host_buffer.host_ptr; + copy_desc += batch->num_copies; + + descriptor_table_mask = copy_template->hoist_root_parameter_index_mask; + + while (descriptor_table_mask) + { + root_parameter_index = vkd3d_bitmask_iter64(&descriptor_table_mask); + table = root_signature_get_descriptor_table(root_signature, root_parameter_index); + table_offsets[table->table_index] = bindings->descriptor_tables[root_parameter_index]; + } + + base_dst_offset = batch->descriptor_buffer_offset / sizeof(uint32_t); + + for (i = 0; i < copy_template->num_entries; i++) + { + entry = ©_template->entries[i]; + copy_desc->set_index = entry->set_index; + copy_desc->count = entry->count; + copy_desc->src_offset = table_offsets[entry->table_index] + entry->constant_offset; + copy_desc->dst_offset = base_dst_offset + entry->dst_offset_words; + batch->num_copies++; + } + + for (i = 0; i < copy_template->num_hoist_sets; i++) + vk_offsets[i] = batch->descriptor_buffer_offset + copy_template->descriptor_offsets[i]; + + /* This pipeline layout is compatible with the root signature so pushing here will not disturb anything. */ + VK_CALL(vkCmdSetDescriptorBufferOffsetsEXT(list->cmd.vk_command_buffer, vk_bind_point, + copy_template->vk_hoist_descriptor_layout, + copy_template->first_hoist_set_index, copy_template->num_hoist_sets, + vk_buffer_indices, vk_offsets)); + + batch->descriptor_buffer_offset += copy_template->descriptor_allocation_size; + bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; +} + static void vk_write_descriptor_set_from_root_descriptor(struct d3d12_command_list *list, VkWriteDescriptorSet *vk_descriptor_write, const struct vkd3d_shader_root_parameter *root_parameter, const struct vkd3d_root_descriptor_info *descriptor) @@ -5972,7 +6034,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, - VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES * sizeof(struct d3d12_command_list_descriptor_copy_word), + VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES * sizeof(struct d3d12_command_list_descriptor_copy_desc), 64, ~0u, &batch->host_buffer); batch->descriptor_buffer_offset = 0; @@ -6310,6 +6372,27 @@ static void d3d12_command_list_update_hoisted_push_descriptors(struct d3d12_comm bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } +static void d3d12_command_list_reserve_hoisted_buffer_descriptor(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings) +{ + assert(list->state); + assert(list->state->hoist_template.num_hoist_sets); + + /* Check if we have exhausted the current batch. */ + if (list->copy_batches_count == 0 || + (list->state->hoist_template.descriptor_allocation_size + + list->copy_batches[list->copy_batches_count - 1].descriptor_buffer_offset > + VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE) || + (list->state->hoist_template.num_entries + list->copy_batches[list->copy_batches_count - 1].num_copies > + VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES)) + { + list->descriptor_heap.buffers.heap_dirty = true; + bindings->descriptor_heap_dirty_mask |= ~0u; + /* d3d12_command_list_update_descriptor_heaps will be called. + * Since heap_dirty it set it will rebind descriptor buffers, and ensure misc descriptor state is marked dirty. */ + } +} + static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list) { struct vkd3d_pipeline_bindings *bindings = d3d12_command_list_get_bindings(list, list->active_pipeline_type); @@ -6328,6 +6411,9 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis vk_bind_point = vk_bind_point_from_pipeline_type(list->active_pipeline_type); + if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS) + d3d12_command_list_reserve_hoisted_buffer_descriptor(list, bindings); + if (bindings->descriptor_heap_dirty_mask) d3d12_command_list_update_descriptor_heaps(list, bindings, vk_bind_point, layout); @@ -6362,6 +6448,9 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS) d3d12_command_list_update_descriptor_table_offsets(list, bindings, layout, push_stages); } + + if (bindings->dirty_flags & VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS) + d3d12_command_list_update_hoisted_buffer_descriptors(list, bindings, vk_bind_point); } static void d3d12_command_list_update_descriptors_post_indirect_buffer(struct d3d12_command_list *list) @@ -8867,6 +8956,14 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_ list->dynamic_state.dirty_flags |= VKD3D_DYNAMIC_STATE_STENCIL_WRITE_MASK; } } + + /* TODO: It's possible we can use some kind of compatibilty hash of the hoist descriptor + * to avoid hoisting, but changing pipeline without modifying the root table parameters + * is very awkward. */ + if (state && state->hoist_template.num_hoist_sets) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; + else + bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; } VkImageLayout vk_image_layout_from_d3d12_resource_state( @@ -9728,7 +9825,12 @@ static inline void d3d12_command_list_set_descriptor_table_embedded(struct d3d12 if (root_signature) { if (root_signature->descriptor_table_count) + { bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; + if (list->state && (list->state->hoist_template.hoist_root_parameter_index_mask & (1ull << index))) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; + } + if (root_signature->hoist_info.num_desc) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } @@ -9749,7 +9851,12 @@ static inline void d3d12_command_list_set_descriptor_table(struct d3d12_command_ if (root_signature) { if (root_signature->descriptor_table_count) + { bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_DESCRIPTOR_TABLE_OFFSETS; + if (list->state && (list->state->hoist_template.hoist_root_parameter_index_mask & (1ull << index))) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; + } + if (root_signature->hoist_info.num_desc) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; } @@ -14499,6 +14606,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(d3d12_command } } #endif + + list->compute_bindings.dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; } static VkStridedDeviceAddressRegionKHR convert_strided_range( diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index b0e0bbf26c..7abdcfd42c 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1551,6 +1551,7 @@ static HRESULT d3d12_root_signature_create_hoisted_descriptor_set_layout( &vk_set_layout))) return hr; + /* Align this to simplify the algorithm that checks for exhaustion. */ copy_template->descriptor_allocation_size = align( copy_template->descriptor_allocation_size, device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment); @@ -1619,6 +1620,7 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( { VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS + VKD3D_MAX_HOIST_SHADER_STAGES]; unsigned int num_sets; + uint64_t table_mask; unsigned int i; HRESULT hr; @@ -1652,6 +1654,23 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( ©_template->vk_hoist_descriptor_layout))) return hr; + table_mask = 0; + for (i = 0; i < copy_template->num_entries; i++) + table_mask |= 1ull << copy_template->entries[i].table_index; + + /* Compute a mask for which root parameter indices we're sensitive to, + * so we don't flush hoisted buffer descriptors more than we need to. + * Use root parameter index rather than table index since it's slower to look up table index + * than just checking root parameter directly. */ + for (i = 0; i < root_signature->parameter_count; i++) + { + if (root_signature->parameters[i].parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE && + (table_mask & (1ull << root_signature->parameters[i].descriptor_table.table_index))) + { + copy_template->hoist_root_parameter_index_mask |= 1ull << i; + } + } + return S_OK; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 6b16f407f4..6f7332c1eb 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2067,6 +2067,7 @@ struct d3d12_descriptor_copy_template VkDeviceSize descriptor_allocation_size; unsigned int num_hoist_sets; unsigned int first_hoist_set_index; + uint64_t hoist_root_parameter_index_mask; }; /* ID3D12PipelineState */ @@ -2790,14 +2791,14 @@ struct d3d12_command_list_sequence }; #define VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE (64 * 1024) -#define VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES ((64 * 1024) / sizeof(struct d3d12_command_list_descriptor_copy_word)) +#define VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES ((64 * 1024) / sizeof(struct d3d12_command_list_descriptor_copy_desc)) /* Represents a copy where we do: * for i in range(count): * store_u32(descriptor_buffer + (dst_offset + i) * sizeof(uint32_t), * load_u32(base_va[set_index] + (src_offset + i) * sizeof(uint32_t))); */ -struct d3d12_command_list_descriptor_copy_word +struct d3d12_command_list_descriptor_copy_desc { uint32_t src_offset; uint16_t dst_offset; From 3ceef752c4ffa4a65888081de761c976546c0d13 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 13:21:49 +0100 Subject: [PATCH 12/27] Clean up copy batch lists. --- libs/vkd3d/command.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d4f220087b..58c5f7646e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1918,6 +1918,7 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm } list->cmd.iteration_count = 1; + list->copy_batches_count = 0; #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) @@ -5010,6 +5011,7 @@ ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *ifa vkd3d_free(list->dsv_resource_tracking); vkd3d_free(list->subresource_tracking); vkd3d_free(list->query_resolves); + vkd3d_free(list->copy_batches); hash_map_free(&list->query_resolve_lut); d3d12_command_list_free_rtas_batch(list); From 2c98f669ce5e339c8215339e95d09986b116ee2e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 13:40:49 +0100 Subject: [PATCH 13/27] Only invalidate buffer descriptors when heap is invalidated. --- libs/vkd3d/command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 58c5f7646e..9836a123e0 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4648,8 +4648,6 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_STATIC_SAMPLER_SET; if (bindings->root_signature->hoist_info.num_desc) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_PUSH_DESCRIPTORS; - if (list->state && list->state->hoist_template.num_hoist_sets) - bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; d3d12_command_list_invalidate_push_constants(bindings); @@ -4657,6 +4655,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l { struct d3d12_device *device = bindings->root_signature->device; bindings->descriptor_heap_dirty_mask = (1ull << device->bindless_state.set_count) - 1; + if (list->state && list->state->hoist_template.num_hoist_sets) + bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; } } From 42837e29ecf815b2bce20e1660b43df4261a5143 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 14:03:43 +0100 Subject: [PATCH 14/27] Resolve hoist on CPU timeline. --- libs/vkd3d/command.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 9836a123e0..847037a15e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5935,6 +5935,8 @@ static void d3d12_command_list_update_hoisted_buffer_descriptors(struct d3d12_co copy_desc->count = entry->count; copy_desc->src_offset = table_offsets[entry->table_index] + entry->constant_offset; copy_desc->dst_offset = base_dst_offset + entry->dst_offset_words; + + copy_desc++; batch->num_copies++; } @@ -15870,6 +15872,25 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command d3d12_command_queue_add_submission(command_queue, &sub); } +static void d3d12_command_list_descriptor_copy_batch_flush_cpu(struct d3d12_command_list_descriptor_copy_batch *batch) +{ + /* Emulate what we intend to do in async compute. */ + const struct d3d12_command_list_descriptor_copy_desc *src = batch->host_buffer.host_ptr; + uint32_t *dst = batch->descriptor_buffer.host_ptr; + VkDeviceAddress va; + unsigned int i; + + for (i = 0; i < batch->num_copies; i++, src++) + { + if (src->src_offset < batch->heaps[src->set_index].num_descriptors) + { + va = batch->heaps[src->set_index].base_va + + src->src_offset * batch->heaps[src->set_index].stride_words * sizeof(uint32_t); + memcpy(dst + src->dst_offset, (const void *)va, src->count * sizeof(uint32_t)); + } + } +} + static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { @@ -15994,6 +16015,10 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffer->commandBuffer = cmd_list->cmd.iterations[iter].vk_command_buffer; } + /* TODO: Move this to GPU timeline */ + for (iter = 0; iter < cmd_list->copy_batches_count; iter++) + d3d12_command_list_descriptor_copy_batch_flush_cpu(&cmd_list->copy_batches[iter]); + if (cmd_list->debug_capture) sub.execute.debug_capture = true; From d9dc4df7c3711988f1022b2defd15083a0a204b5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 14:26:31 +0100 Subject: [PATCH 15/27] Add hoist flag. --- libs/vkd3d/state.c | 39 ++++++++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 1 + 2 files changed, 40 insertions(+) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 7abdcfd42c..e51809d8ac 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -6210,6 +6210,42 @@ static bool vkd3d_bindless_supports_embedded_packed_metadata(struct d3d12_device vkd3d_bindless_get_mutable_descriptor_type_size(device); } +static bool vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(uint32_t size) +{ + return size % 4 == 0 && size >= 4; +} + +static bool vkd3d_bindless_supports_descriptor_buffer_hoisting(struct d3d12_device *device, uint32_t flags) +{ + const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props; + if (d3d12_device_uses_descriptor_buffers(device) && + (flags & VKD3D_BINDLESS_RAW_SSBO) && + !(flags & VKD3D_SSBO_OFFSET_BUFFER)) + { + props = &device->device_info.descriptor_buffer_properties; + if (!vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->samplerDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->sampledImageDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->storageImageDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->robustStorageBufferDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->robustUniformBufferDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->robustUniformTexelBufferDescriptorSize) || + !vkd3d_bindless_descriptor_size_supports_descriptor_buffer_hoisting(props->robustStorageTexelBufferDescriptorSize)) + return false; + + if (props->maxDescriptorBufferBindings < 3 || + props->maxResourceDescriptorBufferBindings < 2 || + props->maxSamplerDescriptorBufferBindings < 2 || + !props->bufferlessPushDescriptors) + return false; + + /* If we pass all these tests, we're on an implementation that does not really care if we rebind descriptor buffers, + * because we're not using true descriptor heaps. */ + return true; + } + else + return false; +} + bool vkd3d_bindless_supports_embedded_mutable_type(struct d3d12_device *device, uint32_t flags) { const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props = &device->device_info.descriptor_buffer_properties; @@ -6502,6 +6538,9 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev flags |= VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED; } + if (vkd3d_bindless_supports_descriptor_buffer_hoisting(device, flags)) + flags |= VKD3D_BINDLESS_HOIST_DESCRIPTOR_BUFFER; + return flags; } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 6f7332c1eb..be203c658e 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -3667,6 +3667,7 @@ enum vkd3d_bindless_flags VKD3D_BINDLESS_MUTABLE_EMBEDDED_PACKED_METADATA = (1u << 10), VKD3D_FORCE_COMPUTE_ROOT_PARAMETERS_PUSH_UBO = (1u << 11), VKD3D_BINDLESS_MUTABLE_TYPE_SPLIT_RAW_TYPED = (1u << 12), + VKD3D_BINDLESS_HOIST_DESCRIPTOR_BUFFER = (1u << 13), }; #define VKD3D_BINDLESS_SET_MAX_EXTRA_BINDINGS 8 From d0d362266de0df792c0f994fc067adf7f2a4a262 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 14:48:52 +0100 Subject: [PATCH 16/27] Add layout to graphics as well. --- libs/vkd3d/command.c | 2 +- libs/vkd3d/state.c | 40 +++++++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 847037a15e..c2a14c51a9 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6023,7 +6023,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li num_global_buffers = 2; - /* if (using hoisting) */ + if (list->device->bindless_state.flags & VKD3D_BINDLESS_HOIST_DESCRIPTOR_BUFFER) { /* If we're doing hoisting, allocate a new chunk now. */ vkd3d_array_reserve((void **)&list->copy_batches, &list->copy_batches_size, diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index e51809d8ac..bf77f87708 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1671,6 +1671,16 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( } } + /* If the first set is empty, don't push an offset to it. + * VVL gets a bit confused when you push a buffer offset to an empty set layout. */ + if (first_set_count == 0) + { + assert(copy_template->num_hoist_sets == 2); + copy_template->num_hoist_sets -= 1; + copy_template->first_hoist_set_index += 1; + copy_template->descriptor_offsets[0] = copy_template->descriptor_offsets[1]; + } + return S_OK; } @@ -2618,16 +2628,20 @@ static void d3d12_pipeline_state_init_shader_interface(struct d3d12_pipeline_sta shader_interface->descriptor_size_sampler = d3d12_device_get_descriptor_handle_increment_size( device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - /* Ignore tess, geom and task. Only bother with the common cases to keep number of sets down. */ - if (/*stage == VK_SHADER_STAGE_VERTEX_BIT || stage == VK_SHADER_STAGE_MESH_BIT_EXT || - stage == VK_SHADER_STAGE_FRAGMENT_BIT || */stage == VK_SHADER_STAGE_COMPUTE_BIT) + if (device->bindless_state.flags & VKD3D_BINDLESS_HOIST_DESCRIPTOR_BUFFER) { - layout = d3d12_root_signature_get_layout(root_signature, state->pipeline_type); - shader_interface->flags |= VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS; - shader_interface->hoist_descriptor_set_index = layout->num_set_layouts; - /* Make the set only depend on the root signature. */ - if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) - shader_interface->hoist_descriptor_set_index++; + /* Ignore tess, geom and task, which are all quite rare. + * Only bother with the common cases to keep number of sets down. */ + if (stage == VK_SHADER_STAGE_VERTEX_BIT || stage == VK_SHADER_STAGE_MESH_BIT_EXT || + stage == VK_SHADER_STAGE_FRAGMENT_BIT || stage == VK_SHADER_STAGE_COMPUTE_BIT) + { + layout = d3d12_root_signature_get_layout(root_signature, state->pipeline_type); + shader_interface->flags |= VKD3D_SHADER_INTERFACE_HOIST_DESCRIPTORS; + shader_interface->hoist_descriptor_set_index = layout->num_set_layouts; + /* Make the set only depend on the root signature. */ + if (stage == VK_SHADER_STAGE_FRAGMENT_BIT) + shader_interface->hoist_descriptor_set_index++; + } } if (stage == VK_SHADER_STAGE_MESH_BIT_EXT) @@ -4936,6 +4950,10 @@ static HRESULT d3d12_pipeline_state_init_graphics_spirv(struct d3d12_pipeline_st /* At this point, we will have valid meta structures set up. * Deduce further PSO information from these structs. */ d3d12_pipeline_state_graphics_handle_meta(state, device); + + if (FAILED(hr = d3d12_pipeline_state_create_hoisted_pipeline_layout(state))) + return hr; + return S_OK; } @@ -4991,6 +5009,10 @@ static HRESULT d3d12_pipeline_state_init_static_pipeline(struct d3d12_pipeline_s graphics->pipeline_layout = state->root_signature->graphics.vk_pipeline_layout; } + /* Override the pipeline layout if we hoist descriptor buffers. */ + if (state->hoist_template.vk_hoist_descriptor_layout) + graphics->pipeline_layout = state->hoist_template.vk_hoist_descriptor_layout; + graphics->pipeline = VK_NULL_HANDLE; graphics->library = VK_NULL_HANDLE; graphics->library_flags = 0; From ac1e40a02c8a65cdeead934298265037ab6e7946 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 15:04:46 +0100 Subject: [PATCH 17/27] Handle unbound state more gracefully. --- libs/vkd3d/command.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index c2a14c51a9..ecfe66a0c2 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -8929,25 +8929,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_ list->state = state; list->rt_state = NULL; - if (!state || list->active_pipeline_type != state->pipeline_type) + if (!state) { - if (state) - { - bindings = d3d12_command_list_get_bindings(list, state->pipeline_type); - if (bindings->root_signature) - { - /* We might have clobbered push constants in the new bind point, - * invalidate all state which can affect push constants. - * We might also change the pipeline layout, in case we switch between mesh and legacy graphics. - * In this scenario, the push constant layout will be incompatible due to stage - * differences, so everything must be rebound. */ - d3d12_command_list_invalidate_root_parameters(list, bindings, true, NULL); - } + list->active_pipeline_type = VKD3D_PIPELINE_TYPE_NONE; + return; + } - list->active_pipeline_type = state->pipeline_type; + bindings = d3d12_command_list_get_bindings(list, state->pipeline_type); + + if (list->active_pipeline_type != state->pipeline_type) + { + if (bindings->root_signature) + { + /* We might have clobbered push constants in the new bind point, + * invalidate all state which can affect push constants. + * We might also change the pipeline layout, in case we switch between mesh and legacy graphics. + * In this scenario, the push constant layout will be incompatible due to stage + * differences, so everything must be rebound. */ + d3d12_command_list_invalidate_root_parameters(list, bindings, true, NULL); } - else - list->active_pipeline_type = VKD3D_PIPELINE_TYPE_NONE; + + list->active_pipeline_type = state->pipeline_type; } if (state->pipeline_type != VKD3D_PIPELINE_TYPE_COMPUTE) @@ -8964,7 +8966,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(d3d12_command_ /* TODO: It's possible we can use some kind of compatibilty hash of the hoist descriptor * to avoid hoisting, but changing pipeline without modifying the root table parameters * is very awkward. */ - if (state && state->hoist_template.num_hoist_sets) + if (state->hoist_template.num_hoist_sets) bindings->dirty_flags |= VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; else bindings->dirty_flags &= ~VKD3D_PIPELINE_DIRTY_HOISTED_BUFFER_DESCRIPTORS; From 71394d2a04b710551c37fca57f0cb6c473022356 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 15:35:17 +0100 Subject: [PATCH 18/27] Fix table index computation. --- libs/vkd3d-shader/dxil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c index be52192d3e..73877396c2 100644 --- a/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d-shader/dxil.c @@ -269,7 +269,7 @@ static void dxil_remap_check_hoist( vk_binding->bindless.heap_root_offset < VKD3D_MAX_HOISTED_DESCRIPTOR_CONSTANT_OFFSET) { hoisted = &remap->meta->hoist_desc[remap->meta->num_hoisted_descriptors]; - hoisted->table_index = vk_binding->root_constant_index - + hoisted->table_index = vk_binding->root_constant_index + remap->num_root_descriptors * 2 - shader_interface_info->descriptor_tables.offset / sizeof(uint32_t); hoisted->constant_offset = vk_binding->bindless.heap_root_offset; hoisted->vk_descriptor_type = vk_descriptor_type; From d82fd5a7b6aea69765e3db3d7bc732a5bbade837 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 22 Nov 2023 17:42:55 +0100 Subject: [PATCH 19/27] Move heap info to a buffer. --- libs/vkd3d/command.c | 23 ++++++++++++++++------- libs/vkd3d/vkd3d_private.h | 20 +++++++++++--------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ecfe66a0c2..bd7ad5efae 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5996,6 +5996,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; VkDescriptorBufferBindingPushDescriptorBufferHandleEXT buffer_handle; struct d3d12_command_list_descriptor_copy_batch *batch; + struct d3d12_command_list_descriptor_copy_heap *heaps; VkDescriptorBufferBindingInfoEXT global_buffers[3]; uint32_t num_global_buffers; unsigned int i; @@ -6041,18 +6042,25 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES * sizeof(struct d3d12_command_list_descriptor_copy_desc), 64, ~0u, &batch->host_buffer); + d3d12_command_allocator_allocate_scratch_memory(list->allocator, + VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, + sizeof(struct d3d12_command_list_descriptor_copy_heap) * VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS, + 64, ~0u, &batch->host_meta_buffer); + batch->descriptor_buffer_offset = 0; batch->num_copies = 0; + heaps = batch->host_meta_buffer.host_ptr; for (i = 0; i < list->device->bindless_state.set_count; i++) { unsigned int buffer_index = list->device->bindless_state.vk_descriptor_buffer_indices[i]; /*batch->heaps[i].base_va = global_buffers[buffer_index].address + list->descriptor_heap.buffers.vk_payload_offsets[i];*/ /* HACK: Use CPU side copy for now. */ - batch->heaps[i].base_va = (VkDeviceAddress) - ((uint8_t *)list->descriptor_heap.buffers.mapped[buffer_index] + list->descriptor_heap.buffers.vk_payload_offsets[i]); - batch->heaps[i].num_descriptors = list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[buffer_index]; - batch->heaps[i].stride_words = list->descriptor_heap.buffers.vk_descriptor_stride_words[i]; + heaps[i].base_va = (VkDeviceAddress) + ((uint8_t *)list->descriptor_heap.buffers.mapped[buffer_index] + + list->descriptor_heap.buffers.vk_payload_offsets[i]); + heaps[i].num_descriptors = list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[buffer_index]; + heaps[i].stride_words = list->descriptor_heap.buffers.vk_descriptor_stride_words[i]; } global_buffers[num_global_buffers].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT; @@ -15877,6 +15885,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command static void d3d12_command_list_descriptor_copy_batch_flush_cpu(struct d3d12_command_list_descriptor_copy_batch *batch) { /* Emulate what we intend to do in async compute. */ + const struct d3d12_command_list_descriptor_copy_heap *heaps = batch->host_meta_buffer.host_ptr; const struct d3d12_command_list_descriptor_copy_desc *src = batch->host_buffer.host_ptr; uint32_t *dst = batch->descriptor_buffer.host_ptr; VkDeviceAddress va; @@ -15884,10 +15893,10 @@ static void d3d12_command_list_descriptor_copy_batch_flush_cpu(struct d3d12_comm for (i = 0; i < batch->num_copies; i++, src++) { - if (src->src_offset < batch->heaps[src->set_index].num_descriptors) + if (src->src_offset < heaps[src->set_index].num_descriptors) { - va = batch->heaps[src->set_index].base_va + - src->src_offset * batch->heaps[src->set_index].stride_words * sizeof(uint32_t); + va = heaps[src->set_index].base_va + + src->src_offset * heaps[src->set_index].stride_words * sizeof(uint32_t); memcpy(dst + src->dst_offset, (const void *)va, src->count * sizeof(uint32_t)); } } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index be203c658e..a09c4f7c9f 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2806,23 +2806,25 @@ struct d3d12_command_list_descriptor_copy_desc uint8_t count; }; +struct d3d12_command_list_descriptor_copy_heap +{ + VkDeviceAddress base_va; + uint32_t num_descriptors; + uint32_t stride_words; +}; + /* A batch is started when application sets descriptor heap. */ struct d3d12_command_list_descriptor_copy_batch { - /* Represents the descriptor heaps. When descriptor heaps are rebound (rare), - * need to start a new batch. */ - struct - { - VkDeviceAddress base_va; - uint32_t num_descriptors; - uint32_t stride_words; - } heaps[VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS]; - /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_WORDS * sizeof(uint32_t) worth of descriptors. */ struct vkd3d_scratch_allocation descriptor_buffer; /* Holds VKD3D_DESCRIPTOR_COPY_BATCH_NUM_WORD_COPIES worth of d3d12_command_list_descriptor_copy_word. */ struct vkd3d_scratch_allocation host_buffer; + /* Holds heaps members, VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS. + * We have to dynamically index, so we cannot use push constants. */ + struct vkd3d_scratch_allocation host_meta_buffer; + unsigned int descriptor_buffer_offset; unsigned int num_copies; }; From aa046f632156806b2b4a0365b08b08f827b5a637 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 12:45:19 +0100 Subject: [PATCH 20/27] Refactor the copy batch into an initial transition. --- libs/vkd3d/command.c | 103 ++++++++++++++++++++----------------- libs/vkd3d/vkd3d_private.h | 15 ++++-- 2 files changed, 68 insertions(+), 50 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index bd7ad5efae..f5954d57a8 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1918,7 +1918,6 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm } list->cmd.iteration_count = 1; - list->copy_batches_count = 0; #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) @@ -5011,7 +5010,6 @@ ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *ifa vkd3d_free(list->dsv_resource_tracking); vkd3d_free(list->subresource_tracking); vkd3d_free(list->query_resolves); - vkd3d_free(list->copy_batches); hash_map_free(&list->query_resolve_lut); d3d12_command_list_free_rtas_batch(list); @@ -5167,6 +5165,25 @@ static HRESULT d3d12_command_list_build_init_commands(struct d3d12_command_list return S_OK; } +static void d3d12_command_list_finish_descriptor_copy_batch(struct d3d12_command_list *list) +{ + if (list->descriptor_copy_batch.num_copies) + { + struct vkd3d_initial_transition *transition; + vkd3d_array_reserve((void**)&list->init_transitions, &list->init_transitions_size, + list->init_transitions_count + 1, sizeof(*list->init_transitions)); + + transition = &list->init_transitions[list->init_transitions_count++]; + transition->type = VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH; + transition->descriptor_copy_batch.descriptor_buffer_va = (VkDeviceAddress)list->descriptor_copy_batch.descriptor_buffer.host_ptr; + transition->descriptor_copy_batch.host_buffer_va = (VkDeviceAddress)list->descriptor_copy_batch.host_buffer.host_ptr; + transition->descriptor_copy_batch.host_meta_va = (VkDeviceAddress)list->descriptor_copy_batch.host_meta_buffer.host_ptr; + transition->descriptor_copy_batch.num_copies = list->descriptor_copy_batch.num_copies; + + memset(&list->descriptor_copy_batch, 0, sizeof(list->descriptor_copy_batch)); + } +} + static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_iface *iface) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface); @@ -5192,6 +5209,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_ifa d3d12_command_list_end_current_render_pass(list, false); d3d12_command_list_end_transfer_batch(list); d3d12_command_list_flush_rtas_batch(list); + d3d12_command_list_finish_descriptor_copy_batch(list); if (list->predication.enabled_on_command_buffer) VK_CALL(vkCmdEndConditionalRenderingEXT(list->cmd.vk_command_buffer)); @@ -5497,6 +5515,7 @@ static void d3d12_command_list_reset_internal_state(struct d3d12_command_list *l list->wbi_batch.batch_len = 0; list->query_resolve_count = 0; list->submit_allocator = NULL; + memset(&list->descriptor_copy_batch, 0, sizeof(list->descriptor_copy_batch)); d3d12_command_list_clear_rtas_batch(list); } @@ -5910,8 +5929,7 @@ static void d3d12_command_list_update_hoisted_buffer_descriptors(struct d3d12_co uint16_t base_dst_offset; unsigned int i; - assert(list->copy_batches_count); - batch = &list->copy_batches[list->copy_batches_count - 1]; + batch = &list->descriptor_copy_batch; copy_template = &list->state->hoist_template; copy_desc = batch->host_buffer.host_ptr; @@ -5995,7 +6013,6 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; VkDescriptorBufferBindingPushDescriptorBufferHandleEXT buffer_handle; - struct d3d12_command_list_descriptor_copy_batch *batch; struct d3d12_command_list_descriptor_copy_heap *heaps; VkDescriptorBufferBindingInfoEXT global_buffers[3]; uint32_t num_global_buffers; @@ -6026,30 +6043,25 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li if (list->device->bindless_state.flags & VKD3D_BINDLESS_HOIST_DESCRIPTOR_BUFFER) { - /* If we're doing hoisting, allocate a new chunk now. */ - vkd3d_array_reserve((void **)&list->copy_batches, &list->copy_batches_size, - list->copy_batches_count + 1, sizeof(*list->copy_batches)); - batch = &list->copy_batches[list->copy_batches_count++]; + d3d12_command_list_finish_descriptor_copy_batch(list); d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_DESCRIPTOR_BUFFER, VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE, list->device->device_info.descriptor_buffer_properties.descriptorBufferOffsetAlignment, - ~0u, &batch->descriptor_buffer); + ~0u, &list->descriptor_copy_batch.descriptor_buffer); d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES * sizeof(struct d3d12_command_list_descriptor_copy_desc), - 64, ~0u, &batch->host_buffer); + 64, ~0u, &list->descriptor_copy_batch.host_buffer); d3d12_command_allocator_allocate_scratch_memory(list->allocator, VKD3D_SCRATCH_POOL_KIND_UNIFORM_UPLOAD, sizeof(struct d3d12_command_list_descriptor_copy_heap) * VKD3D_MAX_BINDLESS_DESCRIPTOR_SETS, - 64, ~0u, &batch->host_meta_buffer); + 64, ~0u, &list->descriptor_copy_batch.host_meta_buffer); - batch->descriptor_buffer_offset = 0; - batch->num_copies = 0; - heaps = batch->host_meta_buffer.host_ptr; + heaps = list->descriptor_copy_batch.host_meta_buffer.host_ptr; for (i = 0; i < list->device->bindless_state.set_count; i++) { @@ -6068,7 +6080,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li global_buffers[num_global_buffers].usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - global_buffers[num_global_buffers].address = batch->descriptor_buffer.va; + global_buffers[num_global_buffers].address = list->descriptor_copy_batch.descriptor_buffer.va; num_global_buffers++; } @@ -6391,12 +6403,11 @@ static void d3d12_command_list_reserve_hoisted_buffer_descriptor(struct d3d12_co assert(list->state->hoist_template.num_hoist_sets); /* Check if we have exhausted the current batch. */ - if (list->copy_batches_count == 0 || - (list->state->hoist_template.descriptor_allocation_size + - list->copy_batches[list->copy_batches_count - 1].descriptor_buffer_offset > - VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE) || - (list->state->hoist_template.num_entries + list->copy_batches[list->copy_batches_count - 1].num_copies > - VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES)) + if ((list->state->hoist_template.descriptor_allocation_size + + list->descriptor_copy_batch.descriptor_buffer_offset > + VKD3D_DESCRIPTOR_COPY_BATCH_DESCRIPTOR_BUFFER_SIZE) || + (list->state->hoist_template.num_entries + list->descriptor_copy_batch.num_copies > + VKD3D_DESCRIPTOR_COPY_BATCH_NUM_COPIES)) { list->descriptor_heap.buffers.heap_dirty = true; bindings->descriptor_heap_dirty_mask |= ~0u; @@ -15882,26 +15893,6 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command d3d12_command_queue_add_submission(command_queue, &sub); } -static void d3d12_command_list_descriptor_copy_batch_flush_cpu(struct d3d12_command_list_descriptor_copy_batch *batch) -{ - /* Emulate what we intend to do in async compute. */ - const struct d3d12_command_list_descriptor_copy_heap *heaps = batch->host_meta_buffer.host_ptr; - const struct d3d12_command_list_descriptor_copy_desc *src = batch->host_buffer.host_ptr; - uint32_t *dst = batch->descriptor_buffer.host_ptr; - VkDeviceAddress va; - unsigned int i; - - for (i = 0; i < batch->num_copies; i++, src++) - { - if (src->src_offset < heaps[src->set_index].num_descriptors) - { - va = heaps[src->set_index].base_va + - src->src_offset * heaps[src->set_index].stride_words * sizeof(uint32_t); - memcpy(dst + src->dst_offset, (const void *)va, src->count * sizeof(uint32_t)); - } - } -} - static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, UINT command_list_count, ID3D12CommandList * const *command_lists) { @@ -16026,10 +16017,6 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffer->commandBuffer = cmd_list->cmd.iterations[iter].vk_command_buffer; } - /* TODO: Move this to GPU timeline */ - for (iter = 0; iter < cmd_list->copy_batches_count; iter++) - d3d12_command_list_descriptor_copy_batch_flush_cpu(&cmd_list->copy_batches[iter]); - if (cmd_list->debug_capture) sub.execute.debug_capture = true; @@ -16674,6 +16661,30 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue d3d12_command_queue_transition_pool_add_query_heap(pool, transition->query_heap); break; + case VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH: + { + /* Emulate what we intend to do in async compute. */ + uint32_t *dst = (void *)transition->descriptor_copy_batch.descriptor_buffer_va; + const struct d3d12_command_list_descriptor_copy_desc *src = + (const void *)transition->descriptor_copy_batch.host_buffer_va; + const struct d3d12_command_list_descriptor_copy_heap *heaps = + (const void *)transition->descriptor_copy_batch.host_meta_va; + VkDeviceAddress va; + unsigned int j; + + for (j = 0; j < transition->descriptor_copy_batch.num_copies; j++, src++) + { + if (src->src_offset < heaps[src->set_index].num_descriptors) + { + va = heaps[src->set_index].base_va + + src->src_offset * heaps[src->set_index].stride_words * sizeof(uint32_t); + memcpy(dst + src->dst_offset, (const void *)va, src->count * sizeof(uint32_t)); + } + } + + break; + } + default: ERR("Unhandled transition type %u.\n", transition->type); } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index a09c4f7c9f..46a704c202 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2544,6 +2544,15 @@ enum vkd3d_initial_transition_type { VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE, VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP, + VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH, +}; + +struct d3d12_command_list_descriptor_copy_batch_meta_args +{ + VkDeviceAddress descriptor_buffer_va; + VkDeviceAddress host_buffer_va; + VkDeviceAddress host_meta_va; + uint32_t num_copies; }; struct vkd3d_initial_transition @@ -2557,6 +2566,7 @@ struct vkd3d_initial_transition bool perform_initial_transition; } resource; struct d3d12_query_heap *query_heap; + struct d3d12_command_list_descriptor_copy_batch_meta_args descriptor_copy_batch; }; }; @@ -2854,10 +2864,7 @@ struct d3d12_command_list } index_buffer; struct d3d12_command_list_sequence cmd; - - struct d3d12_command_list_descriptor_copy_batch *copy_batches; - size_t copy_batches_count; - size_t copy_batches_size; + struct d3d12_command_list_descriptor_copy_batch descriptor_copy_batch; struct d3d12_rtv_desc rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; struct d3d12_rtv_desc dsv; From 1d0905ace3e5efda5698b185f8cad9236fd8416a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 13:16:13 +0100 Subject: [PATCH 21/27] Add meta shader for copy descriptors. --- libs/vkd3d/meson.build | 4 +- libs/vkd3d/shaders/cs_copy_descriptors.comp | 88 +++++++++++++++++++++ libs/vkd3d/vkd3d_shaders.h | 1 + 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 libs/vkd3d/shaders/cs_copy_descriptors.comp diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index 54955a89e8..8cffb0078e 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -40,7 +40,9 @@ vkd3d_shaders =[ 'shaders/fs_sampler_feedback_decode_image_mip_used.frag', 'shaders/cs_sampler_feedback_encode_buffer_min_mip.comp', 'shaders/cs_sampler_feedback_encode_image_min_mip.comp', - 'shaders/cs_sampler_feedback_encode_image_mip_used.comp' + 'shaders/cs_sampler_feedback_encode_image_mip_used.comp', + + 'shaders/cs_copy_descriptors.comp', ] vkd3d_src = [ diff --git a/libs/vkd3d/shaders/cs_copy_descriptors.comp b/libs/vkd3d/shaders/cs_copy_descriptors.comp new file mode 100644 index 0000000000..1f4a43a99d --- /dev/null +++ b/libs/vkd3d/shaders/cs_copy_descriptors.comp @@ -0,0 +1,88 @@ +#version 450 +#extension GL_EXT_buffer_reference : require + +layout(local_size_x = 64) in; + +layout(buffer_reference_align = 4, buffer_reference, std430) readonly buffer SourceHeap +{ + uint data[]; +}; + +layout(buffer_reference_align = 4, buffer_reference, std430) writeonly buffer DestHeap +{ + uint data[]; +}; + +struct BoundHeap +{ + SourceHeap heap; + uint num_descriptors; + uint stride_words; +}; + +layout(buffer_reference_align = 64, buffer_reference, std430) readonly buffer HostMeta +{ + BoundHeap heaps[]; +}; + +struct DescriptorCopy +{ + uint src_offset; + uint dst_offset_set_index_count; +}; + +layout(buffer_reference_align = 64, buffer_reference, std430) readonly buffer CopyBuffer +{ + DescriptorCopy copies[]; +}; + +layout(push_constant, std430) uniform Registers +{ + DestHeap dst; + CopyBuffer src; + HostMeta meta; + uint num_copies; +} registers; + +void main() +{ + uint index = gl_GlobalInvocationID.x; + if (index < registers.num_copies) + { + restrict DestHeap dst = registers.dst; + DescriptorCopy src_copy = registers.src.copies[index]; + uint dst_offset = bitfieldExtract(src_copy.dst_offset_set_index_count, 0, 16); + uint set_index = bitfieldExtract(src_copy.dst_offset_set_index_count, 16, 8); + uint word_count = bitfieldExtract(src_copy.dst_offset_set_index_count, 24, 8); + + BoundHeap heap_meta = registers.meta.heaps[set_index]; + bool in_bounds = src_copy.src_offset < heap_meta.num_descriptors; + + if (in_bounds) + { + uint src_offset = src_copy.src_offset * heap_meta.stride_words; + // We know count is not 0. + uint i = 0; + do + { + dst.data[dst_offset] = heap_meta.heap.data[src_offset]; + i++; + dst_offset++; + src_offset++; + } while (i < word_count); + } + else + { + // memset. Technically not a valid null descriptor, but in practice, it's good enough. + // It's UB to access OOB anyways, so we should never hit this path *and* have an application actually access the descriptor. + // If we really care, we can pass down null payloads. + uint i = 0; + do + { + dst.data[dst_offset] = 0; + i++; + dst_offset++; + } while (i < word_count); + } + } +} diff --git a/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/vkd3d_shaders.h index 4c99b1e7fd..2ec1919e79 100644 --- a/libs/vkd3d/vkd3d_shaders.h +++ b/libs/vkd3d/vkd3d_shaders.h @@ -66,5 +66,6 @@ enum vkd3d_meta_copy_mode #include #include #include +#include #endif /* __VKD3D_SPV_SHADERS_H */ From 10721f0f712018f71d4341ef19b89a0f88d27906 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 13:53:05 +0100 Subject: [PATCH 22/27] Add meta path for descriptor copy. --- libs/vkd3d/command.c | 67 +++++++++------------ libs/vkd3d/meta.c | 45 ++++++++++++++ libs/vkd3d/shaders/cs_copy_descriptors.comp | 2 +- libs/vkd3d/vkd3d_private.h | 25 +++++++- 4 files changed, 99 insertions(+), 40 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index f5954d57a8..3318993ecc 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -5175,9 +5175,9 @@ static void d3d12_command_list_finish_descriptor_copy_batch(struct d3d12_command transition = &list->init_transitions[list->init_transitions_count++]; transition->type = VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH; - transition->descriptor_copy_batch.descriptor_buffer_va = (VkDeviceAddress)list->descriptor_copy_batch.descriptor_buffer.host_ptr; - transition->descriptor_copy_batch.host_buffer_va = (VkDeviceAddress)list->descriptor_copy_batch.host_buffer.host_ptr; - transition->descriptor_copy_batch.host_meta_va = (VkDeviceAddress)list->descriptor_copy_batch.host_meta_buffer.host_ptr; + transition->descriptor_copy_batch.descriptor_buffer_va = list->descriptor_copy_batch.descriptor_buffer.va; + transition->descriptor_copy_batch.host_buffer_va = list->descriptor_copy_batch.host_buffer.va; + transition->descriptor_copy_batch.host_meta_va = list->descriptor_copy_batch.host_meta_buffer.va; transition->descriptor_copy_batch.num_copies = list->descriptor_copy_batch.num_copies; memset(&list->descriptor_copy_batch, 0, sizeof(list->descriptor_copy_batch)); @@ -16053,17 +16053,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm buffer->commandBuffer = command_queue->vkd3d_queue->barrier_command_buffer; } - if (command_list_count == 1 && num_transitions != 0) - { - /* Pilfer directly. */ - cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[0]); - sub.execute.transitions = cmd_list->init_transitions; - sub.execute.transition_count = cmd_list->init_transitions_count; - cmd_list->init_transitions = NULL; - cmd_list->init_transitions_count = 0; - cmd_list->init_transitions_size = 0; - } - else if (num_transitions != 0) + if (num_transitions != 0) { sub.execute.transitions = vkd3d_malloc(num_transitions * sizeof(*sub.execute.transitions)); sub.execute.transition_count = num_transitions; @@ -16618,6 +16608,20 @@ static void d3d12_command_queue_init_query_heap(struct d3d12_device *device, VkC } } +static void d3d12_command_queue_copy_descriptor_batch(struct d3d12_device *device, + VkCommandBuffer vk_cmd_buffer, const struct vkd3d_descriptor_copy_meta_args *args) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_descriptor_copy_info info; + + vkd3d_meta_get_descriptor_copy_pipeline(&device->meta_ops, &info); + VK_CALL(vkCmdBindPipeline(vk_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, info.vk_pipeline)); + VK_CALL(vkCmdPushConstants(vk_cmd_buffer, info.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(*args), args)); + VK_CALL(vkCmdDispatch(vk_cmd_buffer, vkd3d_compute_workgroup_count(args->num_copies, + vkd3d_meta_get_descriptor_copy_workgroup_size()), 1, 1)); +} + static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue_transition_pool *pool, struct d3d12_device *device, const struct vkd3d_initial_transition *transitions, size_t count, VkCommandBuffer *vk_cmd_buffer, uint64_t *timeline_value) @@ -16626,12 +16630,14 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue const struct vkd3d_initial_transition *transition; VkCommandBufferBeginInfo begin_info; unsigned int command_index; + bool need_descriptor_copy; VkDependencyInfo dep_info; uint32_t need_transition; size_t i; pool->barriers_count = 0; pool->query_heaps_count = 0; + need_descriptor_copy = false; if (!count) { @@ -16662,35 +16668,16 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue break; case VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH: - { - /* Emulate what we intend to do in async compute. */ - uint32_t *dst = (void *)transition->descriptor_copy_batch.descriptor_buffer_va; - const struct d3d12_command_list_descriptor_copy_desc *src = - (const void *)transition->descriptor_copy_batch.host_buffer_va; - const struct d3d12_command_list_descriptor_copy_heap *heaps = - (const void *)transition->descriptor_copy_batch.host_meta_va; - VkDeviceAddress va; - unsigned int j; - - for (j = 0; j < transition->descriptor_copy_batch.num_copies; j++, src++) - { - if (src->src_offset < heaps[src->set_index].num_descriptors) - { - va = heaps[src->set_index].base_va + - src->src_offset * heaps[src->set_index].stride_words * sizeof(uint32_t); - memcpy(dst + src->dst_offset, (const void *)va, src->count * sizeof(uint32_t)); - } - } - + /* Deal with it later once we have figured out which command buffer to work on. */ + need_descriptor_copy = true; break; - } default: ERR("Unhandled transition type %u.\n", transition->type); } } - if (!pool->barriers_count && !pool->query_heaps_count) + if (!pool->barriers_count && !pool->query_heaps_count && !need_descriptor_copy) { *vk_cmd_buffer = VK_NULL_HANDLE; return; @@ -16714,10 +16701,16 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue dep_info.imageMemoryBarrierCount = pool->barriers_count; dep_info.pImageMemoryBarriers = pool->barriers; - VK_CALL(vkCmdPipelineBarrier2(pool->cmd[command_index], &dep_info)); + if (pool->barriers_count) + VK_CALL(vkCmdPipelineBarrier2(pool->cmd[command_index], &dep_info)); for (i = 0; i < pool->query_heaps_count; i++) d3d12_command_queue_init_query_heap(device, pool->cmd[command_index], pool->query_heaps[i]); + + for (i = 0; i < count; i++) + if (transitions[i].type == VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH) + d3d12_command_queue_copy_descriptor_batch(device, pool->cmd[command_index], &transitions[i].descriptor_copy_batch); + VK_CALL(vkEndCommandBuffer(pool->cmd[command_index])); *vk_cmd_buffer = pool->cmd[command_index]; diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index 5e948a643f..6fe12fb6f1 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -1669,6 +1669,45 @@ static void vkd3d_sampler_feedback_ops_cleanup(struct vkd3d_sampler_feedback_res VK_CALL(vkDestroyPipeline(device->vk_device, sampler_feedback_ops->vk_pipelines[i], NULL)); } +static HRESULT vkd3d_descriptor_copy_ops_init(struct vkd3d_descriptor_copy_ops *copy_ops, + struct d3d12_device *device) +{ + VkPushConstantRange push_range; + VkResult vr; + + push_range.offset = 0; + push_range.size = sizeof(struct vkd3d_descriptor_copy_meta_args); + push_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + if ((vr = vkd3d_meta_create_pipeline_layout(device, + 0, NULL, + 1, &push_range, + ©_ops->vk_pipeline_layout))) + return hresult_from_vk_result(vr); + + /* Only called in ancillary command buffers. No need to consider descriptor buffers. */ + if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_copy_descriptors), + cs_copy_descriptors, copy_ops->vk_pipeline_layout, + NULL, false, ©_ops->vk_pipeline))) + return hresult_from_vk_result(vr); + + return S_OK; +} + +static void vkd3d_descriptor_copy_ops_cleanup(struct vkd3d_descriptor_copy_ops *copy_ops, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VK_CALL(vkDestroyPipelineLayout(device->vk_device, copy_ops->vk_pipeline_layout, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, copy_ops->vk_pipeline, NULL)); +} + +void vkd3d_meta_get_descriptor_copy_pipeline(struct vkd3d_meta_ops *meta_ops, + struct vkd3d_descriptor_copy_info *info) +{ + info->vk_pipeline = meta_ops->descriptor_copy.vk_pipeline; + info->vk_pipeline_layout = meta_ops->descriptor_copy.vk_pipeline_layout; +} + HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { HRESULT hr; @@ -1706,8 +1745,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device if (FAILED(hr = vkd3d_sampler_feedback_ops_init(&meta_ops->sampler_feedback, device))) goto fail_sampler_feedback; + if (FAILED(hr = vkd3d_descriptor_copy_ops_init(&meta_ops->descriptor_copy, device))) + goto fail_descriptor_copy; + return S_OK; +fail_descriptor_copy: + vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback, device); fail_sampler_feedback: vkd3d_dstorage_ops_cleanup(&meta_ops->dstorage, device); fail_dstorage_ops: @@ -1732,6 +1776,7 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device) { + vkd3d_descriptor_copy_ops_cleanup(&meta_ops->descriptor_copy, device); vkd3d_sampler_feedback_ops_cleanup(&meta_ops->sampler_feedback, device); vkd3d_dstorage_ops_cleanup(&meta_ops->dstorage, device); vkd3d_multi_dispatch_indirect_ops_cleanup(&meta_ops->multi_dispatch_indirect, device); diff --git a/libs/vkd3d/shaders/cs_copy_descriptors.comp b/libs/vkd3d/shaders/cs_copy_descriptors.comp index 1f4a43a99d..1905a70305 100644 --- a/libs/vkd3d/shaders/cs_copy_descriptors.comp +++ b/libs/vkd3d/shaders/cs_copy_descriptors.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_buffer_reference : require -layout(local_size_x = 64) in; +layout(local_size_x = 32) in; layout(buffer_reference_align = 4, buffer_reference, std430) readonly buffer SourceHeap { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 46a704c202..b4191d9cc1 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -2547,7 +2547,7 @@ enum vkd3d_initial_transition_type VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH, }; -struct d3d12_command_list_descriptor_copy_batch_meta_args +struct vkd3d_descriptor_copy_meta_args { VkDeviceAddress descriptor_buffer_va; VkDeviceAddress host_buffer_va; @@ -2566,7 +2566,7 @@ struct vkd3d_initial_transition bool perform_initial_transition; } resource; struct d3d12_query_heap *query_heap; - struct d3d12_command_list_descriptor_copy_batch_meta_args descriptor_copy_batch; + struct vkd3d_descriptor_copy_meta_args descriptor_copy_batch; }; }; @@ -4164,6 +4164,18 @@ struct vkd3d_sampler_feedback_resolve_ops VkPipeline vk_pipelines[VKD3D_SAMPLER_FEEDBACK_RESOLVE_COUNT]; }; +struct vkd3d_descriptor_copy_info +{ + VkPipeline vk_pipeline; + VkPipelineLayout vk_pipeline_layout; +}; + +struct vkd3d_descriptor_copy_ops +{ + VkPipelineLayout vk_pipeline_layout; + VkPipeline vk_pipeline; +}; + struct vkd3d_meta_ops { struct d3d12_device *device; @@ -4177,6 +4189,7 @@ struct vkd3d_meta_ops struct vkd3d_multi_dispatch_indirect_ops multi_dispatch_indirect; struct vkd3d_dstorage_ops dstorage; struct vkd3d_sampler_feedback_resolve_ops sampler_feedback; + struct vkd3d_descriptor_copy_ops descriptor_copy; }; HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device); @@ -4231,6 +4244,14 @@ static inline VkExtent3D vkd3d_meta_get_sampler_feedback_workgroup_size(void) return result; } +static inline uint32_t vkd3d_meta_get_descriptor_copy_workgroup_size(void) +{ + return 32; +} + +void vkd3d_meta_get_descriptor_copy_pipeline(struct vkd3d_meta_ops *meta_ops, + struct vkd3d_descriptor_copy_info *info); + enum vkd3d_time_domain_flag { VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u, From dcf9e20d36daf6ab6d290658b4b7511cbaffd306 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 14:56:32 +0100 Subject: [PATCH 23/27] Implement GPU side copy. --- libs/vkd3d/command.c | 24 ++++++++++++++++----- libs/vkd3d/device.c | 14 ++++++------ libs/vkd3d/meta.c | 9 +++++++- libs/vkd3d/shaders/cs_copy_descriptors.comp | 8 +++++++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 3318993ecc..2557ef9114 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6066,11 +6066,7 @@ static void d3d12_command_list_update_descriptor_buffers(struct d3d12_command_li for (i = 0; i < list->device->bindless_state.set_count; i++) { unsigned int buffer_index = list->device->bindless_state.vk_descriptor_buffer_indices[i]; - /*batch->heaps[i].base_va = global_buffers[buffer_index].address + list->descriptor_heap.buffers.vk_payload_offsets[i];*/ - /* HACK: Use CPU side copy for now. */ - heaps[i].base_va = (VkDeviceAddress) - ((uint8_t *)list->descriptor_heap.buffers.mapped[buffer_index] + - list->descriptor_heap.buffers.vk_payload_offsets[i]); + heaps[i].base_va = global_buffers[buffer_index].address + list->descriptor_heap.buffers.vk_payload_offsets[i]; heaps[i].num_descriptors = list->descriptor_heap.buffers.vk_descriptor_count_for_buffer_index[buffer_index]; heaps[i].stride_words = list->descriptor_heap.buffers.vk_descriptor_stride_words[i]; } @@ -16629,6 +16625,7 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; const struct vkd3d_initial_transition *transition; VkCommandBufferBeginInfo begin_info; + VkMemoryBarrier2 vk_barrier; unsigned int command_index; bool need_descriptor_copy; VkDependencyInfo dep_info; @@ -16711,6 +16708,23 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue if (transitions[i].type == VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH) d3d12_command_queue_copy_descriptor_batch(device, pool->cmd[command_index], &transitions[i].descriptor_copy_batch); + if (need_descriptor_copy) + { + memset(&vk_barrier, 0, sizeof(vk_barrier)); + vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; + vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + vk_barrier.srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT; + vk_barrier.dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + vk_barrier.dstAccessMask = VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT; + + memset(&dep_info, 0, sizeof(dep_info)); + dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; + dep_info.memoryBarrierCount = 1; + dep_info.pMemoryBarriers = &vk_barrier; + + VK_CALL(vkCmdPipelineBarrier2(pool->cmd[command_index], &dep_info)); + } + VK_CALL(vkEndCommandBuffer(pool->cmd[command_index])); *vk_cmd_buffer = pool->cmd[command_index]; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index f22833cc86..027e13f8ff 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -8327,11 +8327,11 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_sampler_state_init(&device->sampler_state, device))) goto out_cleanup_view_map; - if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device))) + if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) goto out_cleanup_sampler_state; - if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) - goto out_cleanup_meta_ops; + if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device))) + goto out_cleanup_debug_ring; vkd3d_scratch_pool_init(device); @@ -8339,7 +8339,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_breadcrumb_tracer_init_barrier_hashes(&device->breadcrumb_tracer); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) if (FAILED(hr = vkd3d_breadcrumb_tracer_init(&device->breadcrumb_tracer, device))) - goto out_cleanup_debug_ring; + goto out_cleanup_meta_ops; #endif if (vkd3d_descriptor_debug_active_qa_checks()) @@ -8386,12 +8386,12 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device); -out_cleanup_debug_ring: +out_cleanup_meta_ops: vkd3d_breadcrumb_tracer_cleanup_barrier_hashes(&device->breadcrumb_tracer); #endif - vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); -out_cleanup_meta_ops: vkd3d_meta_ops_cleanup(&device->meta_ops, device); +out_cleanup_debug_ring: + vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); out_cleanup_sampler_state: vkd3d_sampler_state_cleanup(&device->sampler_state, device); out_cleanup_view_map: diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index 6fe12fb6f1..b641e33b73 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -1672,8 +1672,10 @@ static void vkd3d_sampler_feedback_ops_cleanup(struct vkd3d_sampler_feedback_res static HRESULT vkd3d_descriptor_copy_ops_init(struct vkd3d_descriptor_copy_ops *copy_ops, struct d3d12_device *device) { + struct vkd3d_shader_debug_ring_spec_info debug_ring_info; VkPushConstantRange push_range; VkResult vr; + bool debug; push_range.offset = 0; push_range.size = sizeof(struct vkd3d_descriptor_copy_meta_args); @@ -1684,10 +1686,15 @@ static HRESULT vkd3d_descriptor_copy_ops_init(struct vkd3d_descriptor_copy_ops * ©_ops->vk_pipeline_layout))) return hresult_from_vk_result(vr); + debug = device->debug_ring.active; + + if (debug) + vkd3d_shader_debug_ring_init_spec_constant(device, &debug_ring_info, UINT64_MAX); + /* Only called in ancillary command buffers. No need to consider descriptor buffers. */ if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_copy_descriptors), cs_copy_descriptors, copy_ops->vk_pipeline_layout, - NULL, false, ©_ops->vk_pipeline))) + debug ? &debug_ring_info.spec_info : NULL, false, ©_ops->vk_pipeline))) return hresult_from_vk_result(vr); return S_OK; diff --git a/libs/vkd3d/shaders/cs_copy_descriptors.comp b/libs/vkd3d/shaders/cs_copy_descriptors.comp index 1905a70305..f4007e327c 100644 --- a/libs/vkd3d/shaders/cs_copy_descriptors.comp +++ b/libs/vkd3d/shaders/cs_copy_descriptors.comp @@ -1,5 +1,8 @@ #version 450 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +#extension GL_GOOGLE_include_directive : require +#include "../../../include/shader-debug/debug_channel.h" layout(local_size_x = 32) in; @@ -46,6 +49,7 @@ layout(push_constant, std430) uniform Registers void main() { + DEBUG_CHANNEL_INIT(gl_GlobalInvocationID); uint index = gl_GlobalInvocationID.x; if (index < registers.num_copies) { @@ -55,9 +59,13 @@ void main() uint set_index = bitfieldExtract(src_copy.dst_offset_set_index_count, 16, 8); uint word_count = bitfieldExtract(src_copy.dst_offset_set_index_count, 24, 8); + DEBUG_CHANNEL_MSG(int(dst_offset), int(set_index), int(word_count)); + BoundHeap heap_meta = registers.meta.heaps[set_index]; bool in_bounds = src_copy.src_offset < heap_meta.num_descriptors; + DEBUG_CHANNEL_MSG(int(src_copy.src_offset), int(heap_meta.num_descriptors)); + if (in_bounds) { uint src_offset = src_copy.src_offset * heap_meta.stride_words; From 97255aad6228dc86478d69a278283207f63292e6 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 15:06:11 +0100 Subject: [PATCH 24/27] Fix some warnings. --- libs/vkd3d/state.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index bf77f87708..5a350fea8a 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1600,9 +1600,13 @@ static HRESULT d3d12_root_signature_create_hoisted_descriptor_set_layout( return E_INVALIDARG; } - vkd3d_bindless_state_find_binding(&device->bindless_state, bindless_flags, &binding); - entry->set_index = binding.set; - entry->count = vkd3d_get_descriptor_size_for_type(root_signature->device, bindings[i].descriptorType) / sizeof(uint32_t); + if (vkd3d_bindless_state_find_binding(&device->bindless_state, bindless_flags, &binding)) + { + entry->set_index = binding.set; + entry->count = vkd3d_get_descriptor_size_for_type(root_signature->device, bindings[i].descriptorType) / sizeof(uint32_t); + } + else + return E_INVALIDARG; VK_CALL(vkGetDescriptorSetLayoutBindingOffsetEXT(device->vk_device, vk_set_layout, i, &desc_offset)); entry->dst_offset_words = (copy_template->descriptor_offsets[copy_template->num_hoist_sets] + desc_offset) / sizeof(uint32_t); @@ -2948,7 +2952,7 @@ static void vkd3d_report_pipeline_creation_feedback_results(const VkPipelineCrea static HRESULT d3d12_pipeline_state_create_hoisted_pipeline_layout(struct d3d12_pipeline_state *state) { const struct vkd3d_shader_meta *metas[2] = { NULL }; - VkShaderStageFlagBits stages[2]; + VkShaderStageFlagBits stages[2] = { 0 }; unsigned int i; if (state->pipeline_type == VKD3D_PIPELINE_TYPE_COMPUTE) From 7dddd0fc1935439cfe08274215634161af5c0386 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 15:28:16 +0100 Subject: [PATCH 25/27] Use default heap. --- libs/vkd3d/device.c | 2 +- libs/vkd3d/state.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 027e13f8ff..1f9c08d573 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2950,7 +2950,7 @@ static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, e assert(memory_types == ~0u); memset(&alloc_info, 0, sizeof(alloc_info)); - alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_UPLOAD; /* TODO: DEFAULT */ + alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; alloc_info.heap_desc.SizeInBytes = size; alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 5a350fea8a..cac7c1199b 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1629,7 +1629,10 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( HRESULT hr; if (first_set_count == 0 && second_set_count == 0) + { + INFO ("Hoisting 0 descriptors!\n"); return S_OK; + } copy_template->first_hoist_set_index = layout->num_set_layouts; @@ -1685,6 +1688,8 @@ HRESULT d3d12_root_signature_create_hoisted_descriptor_layout( copy_template->descriptor_offsets[0] = copy_template->descriptor_offsets[1]; } + INFO("Hoisting %u descriptors!\n", copy_template->num_entries); + return S_OK; } From ca9accf07adcc2f58b5a39e18d7eeeb9f3f53fff Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 23 Nov 2023 15:33:02 +0100 Subject: [PATCH 26/27] Add config flag for hoisting. --- include/vkd3d.h | 1 + libs/vkd3d/device.c | 1 + libs/vkd3d/state.c | 1 + 3 files changed, 3 insertions(+) diff --git a/include/vkd3d.h b/include/vkd3d.h index 381968e124..904c5b53e3 100644 --- a/include/vkd3d.h +++ b/include/vkd3d.h @@ -101,6 +101,7 @@ extern "C" { #define VKD3D_CONFIG_FLAG_DISABLE_UAV_COMPRESSION (1ull << 43) #define VKD3D_CONFIG_FLAG_DISABLE_DEPTH_COMPRESSION (1ull << 44) #define VKD3D_CONFIG_FLAG_DISABLE_COLOR_COMPRESSION (1ull << 45) +#define VKD3D_CONFIG_FLAG_DESCRIPTOR_HOISTING (1ull << 46) struct vkd3d_instance; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 1f9c08d573..f4935cb126 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -867,6 +867,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] = {"disable_uav_compression", VKD3D_CONFIG_FLAG_DISABLE_UAV_COMPRESSION}, {"disable_depth_compression", VKD3D_CONFIG_FLAG_DISABLE_DEPTH_COMPRESSION}, {"disable_color_compression", VKD3D_CONFIG_FLAG_DISABLE_COLOR_COMPRESSION}, + {"descriptor_hoisting", VKD3D_CONFIG_FLAG_DESCRIPTOR_HOISTING}, }; static void vkd3d_config_flags_init_once(void) diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index cac7c1199b..c99db202df 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -6250,6 +6250,7 @@ static bool vkd3d_bindless_supports_descriptor_buffer_hoisting(struct d3d12_devi { const VkPhysicalDeviceDescriptorBufferPropertiesEXT *props; if (d3d12_device_uses_descriptor_buffers(device) && + (vkd3d_config_flags & VKD3D_CONFIG_FLAG_DESCRIPTOR_HOISTING) && (flags & VKD3D_BINDLESS_RAW_SSBO) && !(flags & VKD3D_SSBO_OFFSET_BUFFER)) { From 3c2afcdb4da36eac9e06c7bd9334e26bdd6dbe6f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 24 Nov 2023 16:22:01 +0100 Subject: [PATCH 27/27] Do initial transition on async queue when appropriate. --- libs/vkd3d/command.c | 163 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 22 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2557ef9114..27c6b428e3 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -16470,9 +16470,13 @@ static void d3d12_command_queue_signal_shared(struct d3d12_command_queue *comman struct d3d12_command_queue_transition_pool { VkCommandBuffer cmd[VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS]; + VkCommandBuffer async_cmd[VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS]; VkCommandPool pool; + VkCommandPool async_pool; VkSemaphore timeline; uint64_t timeline_value; + VkSemaphore async_timeline; + uint64_t async_timeline_value; VkImageMemoryBarrier2 *barriers; size_t barriers_size; @@ -16502,6 +16506,15 @@ static HRESULT d3d12_command_queue_transition_pool_init(struct d3d12_command_que if ((vr = VK_CALL(vkCreateCommandPool(queue->device->vk_device, &pool_info, NULL, &pool->pool)))) return hresult_from_vk_result(vr); + if (queue->device->memory_transfers.vkd3d_queue->vk_queue != queue->vkd3d_queue->vk_queue) + { + /* When we don't need to perform initializations in a serialized fashion, we can do the work on the + * memory transfer queue out of bounds instead. */ + pool_info.queueFamilyIndex = queue->device->memory_transfers.vkd3d_queue->vk_family_index; + if ((vr = VK_CALL(vkCreateCommandPool(queue->device->vk_device, &pool_info, NULL, &pool->async_pool)))) + return hresult_from_vk_result(vr); + } + alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; alloc_info.pNext = NULL; alloc_info.commandPool = pool->pool; @@ -16513,11 +16526,20 @@ static HRESULT d3d12_command_queue_transition_pool_init(struct d3d12_command_que if (FAILED(hr = vkd3d_create_timeline_semaphore(queue->device, 0, false, &pool->timeline))) return hr; + if (pool->async_pool) + { + alloc_info.commandPool = pool->async_pool; + if ((vr = VK_CALL(vkAllocateCommandBuffers(queue->device->vk_device, &alloc_info, pool->async_cmd)))) + return hresult_from_vk_result(vr); + if (FAILED(hr = vkd3d_create_timeline_semaphore(queue->device, 0, false, &pool->async_timeline))) + return hr; + } + return S_OK; } static void d3d12_command_queue_transition_pool_wait(struct d3d12_command_queue_transition_pool *pool, - struct d3d12_device *device, uint64_t value) + struct d3d12_device *device, VkSemaphore timeline, uint64_t value) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkSemaphoreWaitInfo wait_info; @@ -16526,7 +16548,7 @@ static void d3d12_command_queue_transition_pool_wait(struct d3d12_command_queue_ wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; wait_info.pNext = NULL; wait_info.flags = 0; - wait_info.pSemaphores = &pool->timeline; + wait_info.pSemaphores = &timeline; wait_info.semaphoreCount = 1; wait_info.pValues = &value; vr = VK_CALL(vkWaitSemaphores(device->vk_device, &wait_info, ~(uint64_t)0)); @@ -16537,9 +16559,12 @@ static void d3d12_command_queue_transition_pool_deinit(struct d3d12_command_queu struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - d3d12_command_queue_transition_pool_wait(pool, device, pool->timeline_value); + d3d12_command_queue_transition_pool_wait(pool, device, pool->timeline, pool->timeline_value); + d3d12_command_queue_transition_pool_wait(pool, device, pool->async_timeline, pool->async_timeline_value); VK_CALL(vkDestroyCommandPool(device->vk_device, pool->pool, NULL)); + VK_CALL(vkDestroyCommandPool(device->vk_device, pool->async_pool, NULL)); VK_CALL(vkDestroySemaphore(device->vk_device, pool->timeline, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, pool->async_timeline, NULL)); vkd3d_free(pool->barriers); vkd3d_free((void*)pool->query_heaps); } @@ -16618,14 +16643,57 @@ static void d3d12_command_queue_copy_descriptor_batch(struct d3d12_device *devic vkd3d_meta_get_descriptor_copy_workgroup_size()), 1, 1)); } +static void d3d12_command_queue_transition_pool_submit_async(struct d3d12_command_queue_transition_pool *pool, + struct d3d12_device *device, VkCommandBuffer vk_cmd_buffer) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreSubmitInfo async_semaphore; + VkCommandBufferSubmitInfo cmd; + VkSubmitInfo2 submit; + VkQueue vk_queue; + VkResult vr; + + vk_queue = vkd3d_queue_acquire(device->memory_transfers.vkd3d_queue); + if (!vk_queue) + { + ERR("Failed to acquire memory transfer queue.\n"); + return; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO; + cmd.commandBuffer = vk_cmd_buffer; + + memset(&async_semaphore, 0, sizeof(async_semaphore)); + async_semaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; + async_semaphore.semaphore = pool->async_timeline; + async_semaphore.value = pool->async_timeline_value; + + memset(&submit, 0, sizeof(submit)); + submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2; + submit.pSignalSemaphoreInfos = &async_semaphore; + submit.signalSemaphoreInfoCount = 1; + submit.commandBufferInfoCount = 1; + submit.pCommandBufferInfos = &cmd; + vr = VK_CALL(vkQueueSubmit2(vk_queue, 1, &submit, NULL)); + vkd3d_queue_release(device->memory_transfers.vkd3d_queue); + if (vr < 0) + ERR("Failed to submit async transition, vr %d.\n", vr); + VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST); +} + static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue_transition_pool *pool, struct d3d12_device *device, const struct vkd3d_initial_transition *transitions, size_t count, - VkCommandBuffer *vk_cmd_buffer, uint64_t *timeline_value) + VkCommandBuffer *out_vk_cmd_buffer, VkSemaphoreSubmitInfo *out_semaphore) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; const struct vkd3d_initial_transition *transition; VkCommandBufferBeginInfo begin_info; + uint64_t *target_timeline_value; + VkCommandBuffer vk_cmd_buffer; + VkSemaphore *target_timeline; VkMemoryBarrier2 vk_barrier; + bool need_synchronous_init; unsigned int command_index; bool need_descriptor_copy; VkDependencyInfo dep_info; @@ -16635,10 +16703,13 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue pool->barriers_count = 0; pool->query_heaps_count = 0; need_descriptor_copy = false; + need_synchronous_init = false; if (!count) { - *vk_cmd_buffer = VK_NULL_HANDLE; + *out_vk_cmd_buffer = VK_NULL_HANDLE; + out_semaphore->semaphore = VK_NULL_HANDLE; + out_semaphore->value = 0; return; } @@ -16656,7 +16727,14 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue 0, vkd3d_memory_order_relaxed); if (need_transition && transition->resource.perform_initial_transition) + { + /* If we're initializing committed resources, there's no need to do the transition in a synchronized way. + * Can punt that to async queue. For placed resources, we may have to do it synchronized due to aliasing rules + * and ExecuteCommandLists having implicit sync between them. */ + if (d3d12_resource_may_alias_other_resources(transition->resource.resource)) + need_synchronous_init = true; d3d12_command_queue_transition_pool_add_barrier(pool, transition->resource.resource); + } break; case VKD3D_INITIAL_TRANSITION_TYPE_QUERY_HEAP: @@ -16676,22 +16754,46 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue if (!pool->barriers_count && !pool->query_heaps_count && !need_descriptor_copy) { - *vk_cmd_buffer = VK_NULL_HANDLE; + *out_vk_cmd_buffer = VK_NULL_HANDLE; + out_semaphore->semaphore = VK_NULL_HANDLE; + out_semaphore->value = 0; return; } - pool->timeline_value++; - command_index = pool->timeline_value % VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS; + if (!pool->async_pool) + need_synchronous_init = true; + + if (need_synchronous_init) + { + target_timeline = &pool->timeline; + target_timeline_value = &pool->timeline_value; + } + else + { + target_timeline = &pool->async_timeline; + target_timeline_value = &pool->async_timeline_value; + } + + (*target_timeline_value)++; + command_index = *target_timeline_value % VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS; + + if (*target_timeline_value > VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS) + { + d3d12_command_queue_transition_pool_wait(pool, device, + *target_timeline, *target_timeline_value - VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS); + } - if (pool->timeline_value > VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS) - d3d12_command_queue_transition_pool_wait(pool, device, pool->timeline_value - VKD3D_COMMAND_QUEUE_NUM_TRANSITION_BUFFERS); + if (need_synchronous_init) + vk_cmd_buffer = pool->cmd[command_index]; + else + vk_cmd_buffer = pool->async_cmd[command_index]; begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; begin_info.pNext = NULL; begin_info.pInheritanceInfo = NULL; begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CALL(vkResetCommandBuffer(pool->cmd[command_index], 0)); - VK_CALL(vkBeginCommandBuffer(pool->cmd[command_index], &begin_info)); + VK_CALL(vkResetCommandBuffer(vk_cmd_buffer, 0)); + VK_CALL(vkBeginCommandBuffer(vk_cmd_buffer, &begin_info)); memset(&dep_info, 0, sizeof(dep_info)); dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; @@ -16699,16 +16801,16 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue dep_info.pImageMemoryBarriers = pool->barriers; if (pool->barriers_count) - VK_CALL(vkCmdPipelineBarrier2(pool->cmd[command_index], &dep_info)); + VK_CALL(vkCmdPipelineBarrier2(vk_cmd_buffer, &dep_info)); for (i = 0; i < pool->query_heaps_count; i++) - d3d12_command_queue_init_query_heap(device, pool->cmd[command_index], pool->query_heaps[i]); + d3d12_command_queue_init_query_heap(device, vk_cmd_buffer, pool->query_heaps[i]); for (i = 0; i < count; i++) if (transitions[i].type == VKD3D_INITIAL_TRANSITION_DESCRIPTOR_COPY_BATCH) - d3d12_command_queue_copy_descriptor_batch(device, pool->cmd[command_index], &transitions[i].descriptor_copy_batch); + d3d12_command_queue_copy_descriptor_batch(device, vk_cmd_buffer, &transitions[i].descriptor_copy_batch); - if (need_descriptor_copy) + if (need_descriptor_copy && need_synchronous_init) { memset(&vk_barrier, 0, sizeof(vk_barrier)); vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; @@ -16722,13 +16824,24 @@ static void d3d12_command_queue_transition_pool_build(struct d3d12_command_queue dep_info.memoryBarrierCount = 1; dep_info.pMemoryBarriers = &vk_barrier; - VK_CALL(vkCmdPipelineBarrier2(pool->cmd[command_index], &dep_info)); + VK_CALL(vkCmdPipelineBarrier2(vk_cmd_buffer, &dep_info)); } - VK_CALL(vkEndCommandBuffer(pool->cmd[command_index])); + VK_CALL(vkEndCommandBuffer(vk_cmd_buffer)); - *vk_cmd_buffer = pool->cmd[command_index]; - *timeline_value = pool->timeline_value; + if (need_synchronous_init) + { + *out_vk_cmd_buffer = vk_cmd_buffer; + out_semaphore->semaphore = pool->timeline; + out_semaphore->value = pool->timeline_value; + } + else + { + d3d12_command_queue_transition_pool_submit_async(pool, device, vk_cmd_buffer); + *out_vk_cmd_buffer = VK_NULL_HANDLE; + out_semaphore->semaphore = pool->async_timeline; + out_semaphore->value = pool->async_timeline_value; + } } static VkResult d3d12_command_queue_submit_split_locked(struct d3d12_device *device, @@ -16830,6 +16943,13 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu else { num_submits = 1; + + /* If we did a transition, but in the async queue, wait for it here. */ + if (transition_semaphore->value) + { + submit_desc[0].waitSemaphoreInfoCount = 1; + submit_desc[0].pWaitSemaphoreInfos = transition_semaphore; + } } if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) @@ -17360,14 +17480,13 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) memset(&transition_semaphore, 0, sizeof(transition_semaphore)); transition_semaphore.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; - transition_semaphore.semaphore = pool.timeline; transition_semaphore.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; d3d12_command_queue_transition_pool_build(&pool, queue->device, submission.execute.transitions, submission.execute.transition_count, &transition_cmd.commandBuffer, - &transition_semaphore.value); + &transition_semaphore); d3d12_command_queue_execute(queue, submission.execute.cmd, submission.execute.cmd_count,