From 64f3ef2ad7df86c60ef312dc8f19ec204031387d Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Mon, 8 Apr 2024 15:39:55 -0400 Subject: [PATCH] lavapipe: EXT DGC Acked-by: Konstantin Seurer Part-of: --- src/gallium/frontends/lavapipe/lvp_device.c | 16 + .../lavapipe/lvp_device_generated_commands.c | 299 ++++++++++++++++++ src/gallium/frontends/lavapipe/lvp_execute.c | 272 +++++++++++++++- src/gallium/frontends/lavapipe/lvp_private.h | 32 ++ 4 files changed, 618 insertions(+), 1 deletion(-) diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 5299bd8110e..37cefdfc04e 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -197,6 +197,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .EXT_dynamic_rendering_unused_attachments = true, .EXT_descriptor_buffer = true, .EXT_descriptor_indexing = true, + .EXT_device_generated_commands = true, .EXT_extended_dynamic_state = true, .EXT_extended_dynamic_state2 = true, .EXT_extended_dynamic_state3 = true, @@ -643,6 +644,10 @@ lvp_get_features(const struct lvp_physical_device *pdevice, /* VK_NV_device_generated_commands */ .deviceGeneratedCommandsNV = true, + /* VK_EXT_device_generated_commands */ + .deviceGeneratedCommands = true, + .dynamicGeneratedPipelineLayout = true, + /* VK_EXT_primitive_topology_list_restart */ .primitiveTopologyListRestart = true, .primitiveTopologyPatchListRestart = true, @@ -1067,6 +1072,17 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .minSequencesIndexBufferOffsetAlignment = 4, .minIndirectCommandsBufferOffsetAlignment = 4, + /* VK_EXT_device_generated_commands */ + .maxIndirectPipelineCount = 1<<12, + .maxIndirectShaderObjectCount = 1<<12, + .maxIndirectCommandsIndirectStride = 2048, + .supportedIndirectCommandsInputModes = VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT | VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT, + .supportedIndirectCommandsShaderStages = VK_SHADER_STAGE_ALL, + .supportedIndirectCommandsShaderStagesPipelineBinding = VK_SHADER_STAGE_ALL, + .supportedIndirectCommandsShaderStagesShaderBinding = VK_SHADER_STAGE_ALL, + .deviceGeneratedCommandsTransformFeedback = true, + .deviceGeneratedCommandsMultiDrawIndirectCount = true, + /* VK_EXT_external_memory_host */ .minImportedHostPointerAlignment = 4096, diff --git a/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c b/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c index 0ff0fd209ca..66f3f417d85 100644 --- a/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c +++ b/src/gallium/frontends/lavapipe/lvp_device_generated_commands.c @@ -144,3 +144,302 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV( pMemoryRequirements->memoryRequirements.alignment = 4; pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment); } + +VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectExecutionSetEXT( + VkDevice _device, + const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectExecutionSetEXT* pIndirectExecutionSet) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + bool is_shaders = pCreateInfo->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT; + size_t size = 0; + if (is_shaders) { + size += pCreateInfo->info.pShaderInfo->maxShaderCount; + } else { + size += pCreateInfo->info.pPipelineInfo->maxPipelineCount; + } + size *= sizeof(int64_t); + size += sizeof(struct lvp_indirect_execution_set); + + struct lvp_indirect_execution_set *iset = + vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct lvp_indirect_execution_set), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!iset) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &iset->base, VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT); + iset->is_shaders = is_shaders; + + if (is_shaders) { + for (unsigned i = 0; i < pCreateInfo->info.pShaderInfo->shaderCount; i++) + iset->array[i] = pCreateInfo->info.pShaderInfo->pInitialShaders[i]; + } else { + iset->array[0] = pCreateInfo->info.pPipelineInfo->initialPipeline; + } + + *pIndirectExecutionSet = lvp_indirect_execution_set_to_handle(iset); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectExecutionSetEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + const VkAllocationCallbacks* pAllocator) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet); + + if (!iset) + return; + + vk_object_base_finish(&iset->base); + vk_free2(&device->vk.alloc, pAllocator, iset); +} + +VKAPI_ATTR void VKAPI_CALL lvp_UpdateIndirectExecutionSetPipelineEXT( + VkDevice device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites) +{ + VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet); + + assert(!iset->is_shaders); + for (unsigned i = 0; i < executionSetWriteCount; i++) { + iset->array[pExecutionSetWrites[i].index] = pExecutionSetWrites[i].pipeline; + } +} + +VKAPI_ATTR void VKAPI_CALL lvp_UpdateIndirectExecutionSetShaderEXT( + VkDevice device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites) +{ + VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet); + + assert(iset->is_shaders); + for (unsigned i = 0; i < executionSetWriteCount; i++) { + iset->array[pExecutionSetWrites[i].index] = pExecutionSetWrites[i].shader; + } +} + +static size_t +get_token_info_size(VkIndirectCommandsTokenTypeEXT type) +{ + switch (type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT: + return sizeof(VkIndirectCommandsVertexBufferTokenEXT); + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: + return sizeof(VkIndirectCommandsPushConstantTokenEXT); + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: + return sizeof(VkIndirectCommandsIndexBufferTokenEXT); + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT: + return sizeof(VkIndirectCommandsExecutionSetTokenEXT); + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: + return 0; + default: break; + } + unreachable("unknown token type"); +} + +VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectCommandsLayoutEXT( + VkDevice _device, + const VkIndirectCommandsLayoutCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + struct lvp_indirect_command_layout_ext *elayout; + size_t token_size = pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenEXT); + + for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) { + const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i]; + token_size += get_token_info_size(token->type); + } + + elayout = vk_indirect_command_layout_create(&device->vk, pCreateInfo, pAllocator, sizeof(struct lvp_indirect_command_layout_ext) + token_size); + if (!elayout) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + enum lvp_indirect_layout_type type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW; + + for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) { + const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i]; + switch (token->type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: + type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW; + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: + type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT; + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: + type = LVP_INDIRECT_COMMAND_LAYOUT_DISPATCH; + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: + type = LVP_INDIRECT_COMMAND_LAYOUT_RAYS; + break; + default: break; + } + } + elayout->type = type; + + /* tokens are the last member of the struct */ + size_t tokens_offset = sizeof(struct lvp_indirect_command_layout_ext) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenEXT); + typed_memcpy(elayout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount); + uint8_t *ptr = ((uint8_t *)elayout) + tokens_offset; + /* after the tokens comes the token data */ + for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) { + const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i]; + size_t tsize = get_token_info_size(token->type); + if (tsize) { + elayout->tokens[i].data.pPushConstant = (void*)ptr; + memcpy(ptr, token->data.pPushConstant, tsize); + } + ptr += tsize; + } + + *pIndirectCommandsLayout = lvp_indirect_command_layout_ext_to_handle(elayout); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectCommandsLayoutEXT( + VkDevice _device, + VkIndirectCommandsLayoutEXT indirectCommandsLayout, + const VkAllocationCallbacks* pAllocator) +{ + LVP_FROM_HANDLE(lvp_device, device, _device); + VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, indirectCommandsLayout); + + if (!elayout) + return; + + vk_indirect_command_layout_destroy(&device->vk, pAllocator, &elayout->vk); +} + + +enum vk_cmd_type +lvp_ext_dgc_token_to_cmd_type(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token) +{ + switch (token->type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT: + return VK_CMD_BIND_VERTEX_BUFFERS2; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: + return VK_CMD_PUSH_CONSTANTS2_KHR; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: + return VK_CMD_BIND_INDEX_BUFFER2_KHR; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT: + return elayout->vk.is_shaders ? VK_CMD_BIND_SHADERS_EXT : VK_CMD_BIND_PIPELINE; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: + return VK_CMD_DRAW_INDEXED; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: + return VK_CMD_DRAW; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: + return VK_CMD_DRAW_INDEXED_INDIRECT; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: + return VK_CMD_DRAW_INDIRECT; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: + return VK_CMD_DISPATCH; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: + return VK_CMD_TRACE_RAYS_KHR; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT: + unreachable("unsupported NV mesh"); + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: + return VK_CMD_DRAW_MESH_TASKS_EXT; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: + return VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT; + default: + unreachable("unknown token type"); + } + return UINT32_MAX; +} + +size_t +lvp_ext_dgc_token_size(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token) +{ + UNUSED struct vk_cmd_queue_entry *cmd; + enum vk_cmd_type type = lvp_ext_dgc_token_to_cmd_type(elayout, token); + size_t size = vk_cmd_queue_type_sizes[type]; + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT || token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT) { + size += sizeof(*cmd->u.push_constants2_khr.push_constants_info); + size += token->data.pPushConstant->updateRange.size; + return size; + } + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT) { + /* special case: switch between pipelines/shaders */ + /* CmdBindShaders has 2 dynamically sized arrays */ + if (elayout->vk.is_shaders) + size += sizeof(int64_t) * util_bitcount(token->data.pExecutionSet->shaderStages) * 2; + return size; + } + + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT) + return size + sizeof(VkStridedDeviceAddressRegionKHR) * 4; + + switch (token->type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT: + size += sizeof(*cmd->u.bind_vertex_buffers.buffers); + size += sizeof(*cmd->u.bind_vertex_buffers.offsets); + size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides); + break; + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: + break; + default: + unreachable("unknown type!"); + } + return size; +} + +VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsEXT( + VkDevice device, + const VkGeneratedCommandsMemoryRequirementsInfoEXT* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, pInfo->indirectCommandsLayout); + + size_t size = sizeof(struct list_head); + + for (unsigned i = 0; i < elayout->vk.token_count; i++) { + const VkIndirectCommandsLayoutTokenEXT *token = &elayout->tokens[i]; + size += lvp_ext_dgc_token_size(elayout, token); + } + if (elayout->type == LVP_INDIRECT_COMMAND_LAYOUT_DRAW || elayout->type == LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT) + /* set/unset indirect draw offset */ + size += sizeof(struct vk_cmd_queue_entry) * (pInfo->maxSequenceCount + 1); + + size *= pInfo->maxSequenceCount; + + pMemoryRequirements->memoryRequirements.memoryTypeBits = 1; + pMemoryRequirements->memoryRequirements.alignment = 4; + pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment); +} diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index e6531896ce9..c7d7bd04bcf 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -50,6 +50,7 @@ #include "vk_cmd_enqueue_entrypoints.h" #include "vk_descriptor_update_template.h" #include "vk_util.h" +#include "vk_enum_to_str.h" #define VK_PROTOTYPES #include @@ -4155,6 +4156,267 @@ handle_execute_generated_commands(struct vk_cmd_queue_entry *cmd, struct renderi state->pctx->buffer_unmap(state->pctx, pmap); } +static size_t +process_sequence_ext(struct rendering_state *state, + struct lvp_indirect_execution_set *iset, struct lvp_indirect_command_layout_ext *elayout, + struct list_head *list, uint8_t *pbuf, size_t max_size, + uint8_t *stream, uint32_t seq, uint32_t maxDrawCount, + bool print_cmds) +{ + size_t size = 0; + assert(elayout->vk.token_count); + for (uint32_t t = 0; t < elayout->vk.token_count; t++){ + const VkIndirectCommandsLayoutTokenEXT *token = &elayout->tokens[t]; + uint32_t offset = elayout->vk.stride * seq + token->offset; + void *input = stream + offset; + + struct vk_cmd_queue_entry *cmd = (struct vk_cmd_queue_entry*)(pbuf + size); + cmd->type = lvp_ext_dgc_token_to_cmd_type(elayout, token); + size_t cmd_size = vk_cmd_queue_type_sizes[cmd->type]; + uint8_t *cmdptr = (void*)(pbuf + size + cmd_size); + + if (max_size < size + lvp_ext_dgc_token_size(elayout, token)) + abort(); + + if (print_cmds) + fprintf(stderr, "DGC %s\n", vk_IndirectCommandsTokenTypeEXT_to_str(token->type)); + switch (token->type) { + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT: { + uint32_t *data = input; + const VkIndirectCommandsExecutionSetTokenEXT *info = token->data.pExecutionSet; + if (info->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) { + cmd->u.bind_pipeline.pipeline_bind_point = lvp_pipeline_types_from_shader_stages(info->shaderStages); + cmd->u.bind_pipeline.pipeline = iset->array[*data]; + /* validate */ + lvp_pipeline_from_handle(cmd->u.bind_pipeline.pipeline); + + assert(cmd->u.bind_pipeline.pipeline && "cannot bind null pipeline!"); + } else { + unsigned count = util_bitcount(info->shaderStages); + cmd->u.bind_shaders_ext.stage_count = count; + cmd->u.bind_shaders_ext.stages = (void*)cmdptr; + int i = 0; + u_foreach_bit(stage, info->shaderStages) { + cmd->u.bind_shaders_ext.stages[i] = BITFIELD_BIT(stage); + assert(cmd->u.bind_shaders_ext.stages[i] && "cannot bind null shader stage!"); + i++; + } + cmd->u.bind_shaders_ext.shaders = (void*)(cmdptr + sizeof(int64_t) * count); + for (unsigned i = 0; i < count; i++) { + cmd->u.bind_shaders_ext.shaders[i] = iset->array[data[i]]; + if (cmd->u.bind_shaders_ext.shaders[i]) + lvp_shader_from_handle(cmd->u.bind_shaders_ext.shaders[i]); + } + } + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT: + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: { + uint32_t *data = input; + const VkIndirectCommandsPushConstantTokenEXT *info = token->data.pPushConstant; + cmd->u.push_constants2_khr.push_constants_info = (void*)cmdptr; + VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info; + pci->layout = elayout->vk.layout; + pci->stageFlags = VK_SHADER_STAGE_ALL; + pci->offset = info->updateRange.offset; + pci->size = info->updateRange.size; + pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR)); + if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT) + memcpy((void*)pci->pValues, data, info->updateRange.size); + else + memcpy((void*)pci->pValues, &seq, info->updateRange.size); + + break; + } +/* these are the DXGI format values to avoid needing the full header */ +#define DXGI_FORMAT_R32_UINT 42 +#define DXGI_FORMAT_R16_UINT 57 + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: { + const VkIndirectCommandsIndexBufferTokenEXT *info = token->data.pIndexBuffer; + VkBindIndexBufferIndirectCommandEXT *data = input; + cmd->u.bind_index_buffer2_khr.offset = 0; + if (data->bufferAddress) + cmd->u.bind_index_buffer2_khr.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_index_buffer.offset); + else + cmd->u.bind_index_buffer2_khr.buffer = VK_NULL_HANDLE; + if (info->mode == VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT) { + cmd->u.bind_index_buffer2_khr.index_type = data->indexType; + } else { + switch ((int)data->indexType) { + case DXGI_FORMAT_R32_UINT: + cmd->u.bind_index_buffer2_khr.index_type = VK_INDEX_TYPE_UINT32; + break; + case DXGI_FORMAT_R16_UINT: + cmd->u.bind_index_buffer2_khr.index_type = VK_INDEX_TYPE_UINT16; + break; + default: + unreachable("unknown DXGI index type!"); + } + } + cmd->u.bind_index_buffer2_khr.size = data->size; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT: { + VkBindVertexBufferIndirectCommandEXT *data = input; + cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets); + cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides); + if (max_size < size + cmd_size) + abort(); + + cmd->u.bind_vertex_buffers2.first_binding = token->data.pVertexBuffer->vertexBindingUnit; + cmd->u.bind_vertex_buffers2.binding_count = 1; + + cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr; + uint32_t alloc_offset = sizeof(*cmd->u.bind_vertex_buffers2.buffers); + + cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + alloc_offset); + alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.offsets); + + cmd->u.bind_vertex_buffers2.sizes = (void*)(cmdptr + alloc_offset); + alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.sizes); + + cmd->u.bind_vertex_buffers2.offsets[0] = 0; + cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE; + cmd->u.bind_vertex_buffers2.sizes[0] = data->size; + + cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + alloc_offset); + cmd->u.bind_vertex_buffers2.strides[0] = data->stride; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: { + VkDispatchIndirectCommand *data = input; + memcpy(&cmd->u.dispatch, data, sizeof(VkDispatchIndirectCommand)); + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: { + VkDrawIndexedIndirectCommand *data = input; + memcpy(&cmd->u.draw_indexed, data, sizeof(VkDrawIndexedIndirectCommand)); + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: { + VkDrawIndirectCommand *data = input; + memcpy(&cmd->u.draw, data, sizeof(VkDrawIndirectCommand)); + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: { + VkDrawIndirectCountIndirectCommandEXT *data = input; + + cmd->u.draw_indexed_indirect.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_indexed_indirect.offset); + cmd->u.draw_indexed_indirect.draw_count = MIN2(data->commandCount, maxDrawCount); + cmd->u.draw_indexed_indirect.stride = data->stride; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: { + VkDrawIndirectCountIndirectCommandEXT *data = input; + + cmd->u.draw_indirect.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_indirect.offset); + cmd->u.draw_indirect.draw_count = MIN2(data->commandCount, maxDrawCount); + cmd->u.draw_indirect.stride = data->stride; + break; + } + // only available if VK_EXT_mesh_shader is supported + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: { + VkDrawMeshTasksIndirectCommandEXT *data = input; + memcpy(&cmd->u.draw_mesh_tasks_ext, data, sizeof(VkDrawIndirectCountIndirectCommandEXT)); + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: { + VkDrawIndirectCountIndirectCommandEXT *data = input; + + cmd->u.draw_mesh_tasks_indirect_ext.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_mesh_tasks_indirect_ext.offset); + cmd->u.draw_mesh_tasks_indirect_ext.draw_count = MIN2(data->commandCount, maxDrawCount); + cmd->u.draw_mesh_tasks_indirect_ext.stride = data->stride; + break; + } + case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: { + VkTraceRaysIndirectCommand2KHR *data = input; + VkStridedDeviceAddressRegionKHR *sbts = (void*)cmdptr; + + cmd->u.trace_rays_khr.raygen_shader_binding_table = &sbts[0]; + cmd->u.trace_rays_khr.raygen_shader_binding_table->deviceAddress = data->raygenShaderRecordAddress; + cmd->u.trace_rays_khr.raygen_shader_binding_table->stride = data->raygenShaderRecordSize; + cmd->u.trace_rays_khr.raygen_shader_binding_table->size = data->raygenShaderRecordSize; + + cmd->u.trace_rays_khr.miss_shader_binding_table = &sbts[1]; + cmd->u.trace_rays_khr.miss_shader_binding_table->deviceAddress = data->missShaderBindingTableAddress; + cmd->u.trace_rays_khr.miss_shader_binding_table->stride = data->missShaderBindingTableStride; + cmd->u.trace_rays_khr.miss_shader_binding_table->size = data->missShaderBindingTableSize; + + cmd->u.trace_rays_khr.hit_shader_binding_table = &sbts[2]; + cmd->u.trace_rays_khr.hit_shader_binding_table->deviceAddress = data->hitShaderBindingTableAddress; + cmd->u.trace_rays_khr.hit_shader_binding_table->stride = data->hitShaderBindingTableStride; + cmd->u.trace_rays_khr.hit_shader_binding_table->size = data->hitShaderBindingTableSize; + + cmd->u.trace_rays_khr.callable_shader_binding_table = &sbts[3]; + cmd->u.trace_rays_khr.callable_shader_binding_table->deviceAddress = data->callableShaderBindingTableAddress; + cmd->u.trace_rays_khr.callable_shader_binding_table->stride = data->callableShaderBindingTableStride; + cmd->u.trace_rays_khr.callable_shader_binding_table->size = data->callableShaderBindingTableSize; + + cmd->u.trace_rays_khr.width = data->width; + cmd->u.trace_rays_khr.height = data->height; + cmd->u.trace_rays_khr.depth = data->depth; + + break; + } + default: + unreachable("unknown token type"); + break; + } + size += lvp_ext_dgc_token_size(elayout, token); + list_addtail(&cmd->cmd_link, list); + } + return size; +} + +static void +handle_preprocess_generated_commands_ext(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds) +{ + VkGeneratedCommandsInfoEXT *pre = cmd->u.preprocess_generated_commands_ext.generated_commands_info; + VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, pre->indirectCommandsLayout); + VK_FROM_HANDLE(lvp_indirect_execution_set, iset, pre->indirectExecutionSet); + + unsigned seq_count = pre->maxSequenceCount; + if (pre->sequenceCountAddress) { + uint32_t *count = (void*)(uintptr_t)pre->sequenceCountAddress; + seq_count = MIN2(seq_count, *count); + } + + struct list_head *list = (void*)(uintptr_t)pre->preprocessAddress; + size_t size = sizeof(struct list_head); + size_t max_size = pre->preprocessSize; + if (size > max_size) + abort(); + list_inithead(list); + + size_t offset = size; + uint8_t *p = (void*)(uintptr_t)pre->preprocessAddress; + for (unsigned i = 0; i < seq_count; i++) { + offset += process_sequence_ext(state, iset, elayout, list, p + offset, max_size, (void*)(uintptr_t)pre->indirectAddress, i, pre->maxDrawCount, print_cmds); + assert(offset); + } + + /* vk_cmd_queue will copy the binary and break the list, so null the tail pointer */ + list->prev->next = NULL; +} + +static void +handle_execute_generated_commands_ext(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds) +{ + VkGeneratedCommandsInfoEXT *gen = cmd->u.execute_generated_commands_ext.generated_commands_info; + struct vk_cmd_execute_generated_commands_ext *exec = &cmd->u.execute_generated_commands_ext; + if (!exec->is_preprocessed) { + struct vk_cmd_queue_entry pre; + pre.u.preprocess_generated_commands_ext.generated_commands_info = exec->generated_commands_info; + handle_preprocess_generated_commands_ext(&pre, state, print_cmds); + } + uint8_t *p = (void*)(uintptr_t)gen->preprocessAddress; + struct list_head *list = (void*)p; + + struct vk_cmd_queue_entry *exec_cmd = list_first_entry(list, struct vk_cmd_queue_entry, cmd_link); + if (exec_cmd) + lvp_execute_cmd_buffer(list, state, print_cmds); +} + static void handle_descriptor_buffers(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -4788,6 +5050,8 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdBindPipelineShaderGroupNV) ENQUEUE_CMD(CmdPreprocessGeneratedCommandsNV) ENQUEUE_CMD(CmdExecuteGeneratedCommandsNV) + ENQUEUE_CMD(CmdPreprocessGeneratedCommandsEXT) + ENQUEUE_CMD(CmdExecuteGeneratedCommandsEXT) #ifdef VK_ENABLE_BETA_EXTENSIONS ENQUEUE_CMD(CmdInitializeGraphScratchMemoryAMDX) @@ -4823,7 +5087,7 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, LIST_FOR_EACH_ENTRY(cmd, cmds, cmd_link) { if (print_cmds) fprintf(stderr, "%s\n", vk_cmd_queue_type_names[cmd->type]); - switch (cmd->type) { + switch ((unsigned)cmd->type) { case VK_CMD_BIND_PIPELINE: handle_pipeline(cmd, state); break; @@ -5151,6 +5415,12 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, case VK_CMD_EXECUTE_GENERATED_COMMANDS_NV: handle_execute_generated_commands(cmd, state, print_cmds); break; + case VK_CMD_PREPROCESS_GENERATED_COMMANDS_EXT: + handle_preprocess_generated_commands_ext(cmd, state, print_cmds); + break; + case VK_CMD_EXECUTE_GENERATED_COMMANDS_EXT: + handle_execute_generated_commands_ext(cmd, state, print_cmds); + break; case VK_CMD_BIND_DESCRIPTOR_BUFFERS_EXT: handle_descriptor_buffers(cmd, state); break; diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 923a69c46e4..388ae9fed68 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -69,6 +69,7 @@ typedef uint32_t xcb_window_t; #include "vk_buffer.h" #include "vk_buffer_view.h" #include "vk_device.h" +#include "vk_device_generated_commands.h" #include "vk_instance.h" #include "vk_image.h" #include "vk_log.h" @@ -638,6 +639,29 @@ struct lvp_indirect_command_layout_nv { VkIndirectCommandsLayoutTokenNV tokens[0]; }; +struct lvp_indirect_execution_set { + struct vk_object_base base; + bool is_shaders; +#if VK_USE_64_BIT_PTR_DEFINES + void *array[0]; +#else + uint64_t array[0]; +#endif +}; + +enum lvp_indirect_layout_type { + LVP_INDIRECT_COMMAND_LAYOUT_DRAW, + LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT, + LVP_INDIRECT_COMMAND_LAYOUT_DISPATCH, + LVP_INDIRECT_COMMAND_LAYOUT_RAYS, +}; + +struct lvp_indirect_command_layout_ext { + struct vk_indirect_command_layout vk; + enum lvp_indirect_layout_type type; + VkIndirectCommandsLayoutTokenEXT tokens[0]; +}; + extern const struct vk_command_buffer_ops lvp_cmd_buffer_ops; static inline const struct lvp_descriptor_set_layout * @@ -695,6 +719,10 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_sampler, vk.base, VkSampler, VK_OBJECT_TYPE_SAMPLER) VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout_nv, base, VkIndirectCommandsLayoutNV, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV) +VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout_ext, vk.base, VkIndirectCommandsLayoutEXT, + VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT) +VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_execution_set, base, VkIndirectExecutionSetEXT, + VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT) void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp); @@ -795,6 +823,10 @@ lvp_create_ahb_memory(struct lvp_device *device, struct lvp_device_memory *mem, const VkMemoryAllocateInfo *pAllocateInfo); #endif +enum vk_cmd_type +lvp_ext_dgc_token_to_cmd_type(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token); +size_t +lvp_ext_dgc_token_size(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token); #ifdef __cplusplus } #endif