lavapipe: EXT DGC

Acked-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31386>
This commit is contained in:
Mike Blumenkrantz
2024-04-08 15:39:55 -04:00
committed by Marge Bot
parent 8097a8e81e
commit 64f3ef2ad7
4 changed files with 618 additions and 1 deletions
@@ -197,6 +197,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported =
.EXT_dynamic_rendering_unused_attachments = true,
.EXT_descriptor_buffer = true,
.EXT_descriptor_indexing = true,
.EXT_device_generated_commands = true,
.EXT_extended_dynamic_state = true,
.EXT_extended_dynamic_state2 = true,
.EXT_extended_dynamic_state3 = true,
@@ -643,6 +644,10 @@ lvp_get_features(const struct lvp_physical_device *pdevice,
/* VK_NV_device_generated_commands */
.deviceGeneratedCommandsNV = true,
/* VK_EXT_device_generated_commands */
.deviceGeneratedCommands = true,
.dynamicGeneratedPipelineLayout = true,
/* VK_EXT_primitive_topology_list_restart */
.primitiveTopologyListRestart = true,
.primitiveTopologyPatchListRestart = true,
@@ -1067,6 +1072,17 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie
.minSequencesIndexBufferOffsetAlignment = 4,
.minIndirectCommandsBufferOffsetAlignment = 4,
/* VK_EXT_device_generated_commands */
.maxIndirectPipelineCount = 1<<12,
.maxIndirectShaderObjectCount = 1<<12,
.maxIndirectCommandsIndirectStride = 2048,
.supportedIndirectCommandsInputModes = VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT | VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
.supportedIndirectCommandsShaderStages = VK_SHADER_STAGE_ALL,
.supportedIndirectCommandsShaderStagesPipelineBinding = VK_SHADER_STAGE_ALL,
.supportedIndirectCommandsShaderStagesShaderBinding = VK_SHADER_STAGE_ALL,
.deviceGeneratedCommandsTransformFeedback = true,
.deviceGeneratedCommandsMultiDrawIndirectCount = true,
/* VK_EXT_external_memory_host */
.minImportedHostPointerAlignment = 4096,
@@ -144,3 +144,302 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV(
pMemoryRequirements->memoryRequirements.alignment = 4;
pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment);
}
VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectExecutionSetEXT(
VkDevice _device,
const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkIndirectExecutionSetEXT* pIndirectExecutionSet)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
bool is_shaders = pCreateInfo->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT;
size_t size = 0;
if (is_shaders) {
size += pCreateInfo->info.pShaderInfo->maxShaderCount;
} else {
size += pCreateInfo->info.pPipelineInfo->maxPipelineCount;
}
size *= sizeof(int64_t);
size += sizeof(struct lvp_indirect_execution_set);
struct lvp_indirect_execution_set *iset =
vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct lvp_indirect_execution_set),
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!iset)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &iset->base, VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
iset->is_shaders = is_shaders;
if (is_shaders) {
for (unsigned i = 0; i < pCreateInfo->info.pShaderInfo->shaderCount; i++)
iset->array[i] = pCreateInfo->info.pShaderInfo->pInitialShaders[i];
} else {
iset->array[0] = pCreateInfo->info.pPipelineInfo->initialPipeline;
}
*pIndirectExecutionSet = lvp_indirect_execution_set_to_handle(iset);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectExecutionSetEXT(
VkDevice _device,
VkIndirectExecutionSetEXT indirectExecutionSet,
const VkAllocationCallbacks* pAllocator)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet);
if (!iset)
return;
vk_object_base_finish(&iset->base);
vk_free2(&device->vk.alloc, pAllocator, iset);
}
VKAPI_ATTR void VKAPI_CALL lvp_UpdateIndirectExecutionSetPipelineEXT(
VkDevice device,
VkIndirectExecutionSetEXT indirectExecutionSet,
uint32_t executionSetWriteCount,
const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites)
{
VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet);
assert(!iset->is_shaders);
for (unsigned i = 0; i < executionSetWriteCount; i++) {
iset->array[pExecutionSetWrites[i].index] = pExecutionSetWrites[i].pipeline;
}
}
VKAPI_ATTR void VKAPI_CALL lvp_UpdateIndirectExecutionSetShaderEXT(
VkDevice device,
VkIndirectExecutionSetEXT indirectExecutionSet,
uint32_t executionSetWriteCount,
const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites)
{
VK_FROM_HANDLE(lvp_indirect_execution_set, iset, indirectExecutionSet);
assert(iset->is_shaders);
for (unsigned i = 0; i < executionSetWriteCount; i++) {
iset->array[pExecutionSetWrites[i].index] = pExecutionSetWrites[i].shader;
}
}
static size_t
get_token_info_size(VkIndirectCommandsTokenTypeEXT type)
{
switch (type) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT:
return sizeof(VkIndirectCommandsVertexBufferTokenEXT);
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT:
return sizeof(VkIndirectCommandsPushConstantTokenEXT);
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT:
return sizeof(VkIndirectCommandsIndexBufferTokenEXT);
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT:
return sizeof(VkIndirectCommandsExecutionSetTokenEXT);
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT:
return 0;
default: break;
}
unreachable("unknown token type");
}
VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateIndirectCommandsLayoutEXT(
VkDevice _device,
const VkIndirectCommandsLayoutCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkIndirectCommandsLayoutEXT* pIndirectCommandsLayout)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
struct lvp_indirect_command_layout_ext *elayout;
size_t token_size = pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenEXT);
for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) {
const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i];
token_size += get_token_info_size(token->type);
}
elayout = vk_indirect_command_layout_create(&device->vk, pCreateInfo, pAllocator, sizeof(struct lvp_indirect_command_layout_ext) + token_size);
if (!elayout)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
enum lvp_indirect_layout_type type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW;
for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) {
const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i];
switch (token->type) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT:
type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT:
type = LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT:
type = LVP_INDIRECT_COMMAND_LAYOUT_DISPATCH;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT:
type = LVP_INDIRECT_COMMAND_LAYOUT_RAYS;
break;
default: break;
}
}
elayout->type = type;
/* tokens are the last member of the struct */
size_t tokens_offset = sizeof(struct lvp_indirect_command_layout_ext) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenEXT);
typed_memcpy(elayout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount);
uint8_t *ptr = ((uint8_t *)elayout) + tokens_offset;
/* after the tokens comes the token data */
for (unsigned i = 0; i < pCreateInfo->tokenCount; i++) {
const VkIndirectCommandsLayoutTokenEXT *token = &pCreateInfo->pTokens[i];
size_t tsize = get_token_info_size(token->type);
if (tsize) {
elayout->tokens[i].data.pPushConstant = (void*)ptr;
memcpy(ptr, token->data.pPushConstant, tsize);
}
ptr += tsize;
}
*pIndirectCommandsLayout = lvp_indirect_command_layout_ext_to_handle(elayout);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL lvp_DestroyIndirectCommandsLayoutEXT(
VkDevice _device,
VkIndirectCommandsLayoutEXT indirectCommandsLayout,
const VkAllocationCallbacks* pAllocator)
{
LVP_FROM_HANDLE(lvp_device, device, _device);
VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, indirectCommandsLayout);
if (!elayout)
return;
vk_indirect_command_layout_destroy(&device->vk, pAllocator, &elayout->vk);
}
enum vk_cmd_type
lvp_ext_dgc_token_to_cmd_type(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token)
{
switch (token->type) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT:
return VK_CMD_BIND_VERTEX_BUFFERS2;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT:
return VK_CMD_PUSH_CONSTANTS2_KHR;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT:
return VK_CMD_BIND_INDEX_BUFFER2_KHR;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT:
return elayout->vk.is_shaders ? VK_CMD_BIND_SHADERS_EXT : VK_CMD_BIND_PIPELINE;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT:
return VK_CMD_DRAW_INDEXED;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT:
return VK_CMD_DRAW;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT:
return VK_CMD_DRAW_INDEXED_INDIRECT;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT:
return VK_CMD_DRAW_INDIRECT;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT:
return VK_CMD_DISPATCH;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT:
return VK_CMD_TRACE_RAYS_KHR;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT:
unreachable("unsupported NV mesh");
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT:
return VK_CMD_DRAW_MESH_TASKS_EXT;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT:
return VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT;
default:
unreachable("unknown token type");
}
return UINT32_MAX;
}
size_t
lvp_ext_dgc_token_size(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token)
{
UNUSED struct vk_cmd_queue_entry *cmd;
enum vk_cmd_type type = lvp_ext_dgc_token_to_cmd_type(elayout, token);
size_t size = vk_cmd_queue_type_sizes[type];
if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT || token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT) {
size += sizeof(*cmd->u.push_constants2_khr.push_constants_info);
size += token->data.pPushConstant->updateRange.size;
return size;
}
if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT) {
/* special case: switch between pipelines/shaders */
/* CmdBindShaders has 2 dynamically sized arrays */
if (elayout->vk.is_shaders)
size += sizeof(int64_t) * util_bitcount(token->data.pExecutionSet->shaderStages) * 2;
return size;
}
if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT)
return size + sizeof(VkStridedDeviceAddressRegionKHR) * 4;
switch (token->type) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT:
size += sizeof(*cmd->u.bind_vertex_buffers.buffers);
size += sizeof(*cmd->u.bind_vertex_buffers.offsets);
size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT:
break;
default:
unreachable("unknown type!");
}
return size;
}
VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsEXT(
VkDevice device,
const VkGeneratedCommandsMemoryRequirementsInfoEXT* pInfo,
VkMemoryRequirements2* pMemoryRequirements)
{
VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, pInfo->indirectCommandsLayout);
size_t size = sizeof(struct list_head);
for (unsigned i = 0; i < elayout->vk.token_count; i++) {
const VkIndirectCommandsLayoutTokenEXT *token = &elayout->tokens[i];
size += lvp_ext_dgc_token_size(elayout, token);
}
if (elayout->type == LVP_INDIRECT_COMMAND_LAYOUT_DRAW || elayout->type == LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT)
/* set/unset indirect draw offset */
size += sizeof(struct vk_cmd_queue_entry) * (pInfo->maxSequenceCount + 1);
size *= pInfo->maxSequenceCount;
pMemoryRequirements->memoryRequirements.memoryTypeBits = 1;
pMemoryRequirements->memoryRequirements.alignment = 4;
pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment);
}
+271 -1
View File
@@ -50,6 +50,7 @@
#include "vk_cmd_enqueue_entrypoints.h"
#include "vk_descriptor_update_template.h"
#include "vk_util.h"
#include "vk_enum_to_str.h"
#define VK_PROTOTYPES
#include <vulkan/vulkan.h>
@@ -4155,6 +4156,267 @@ handle_execute_generated_commands(struct vk_cmd_queue_entry *cmd, struct renderi
state->pctx->buffer_unmap(state->pctx, pmap);
}
static size_t
process_sequence_ext(struct rendering_state *state,
struct lvp_indirect_execution_set *iset, struct lvp_indirect_command_layout_ext *elayout,
struct list_head *list, uint8_t *pbuf, size_t max_size,
uint8_t *stream, uint32_t seq, uint32_t maxDrawCount,
bool print_cmds)
{
size_t size = 0;
assert(elayout->vk.token_count);
for (uint32_t t = 0; t < elayout->vk.token_count; t++){
const VkIndirectCommandsLayoutTokenEXT *token = &elayout->tokens[t];
uint32_t offset = elayout->vk.stride * seq + token->offset;
void *input = stream + offset;
struct vk_cmd_queue_entry *cmd = (struct vk_cmd_queue_entry*)(pbuf + size);
cmd->type = lvp_ext_dgc_token_to_cmd_type(elayout, token);
size_t cmd_size = vk_cmd_queue_type_sizes[cmd->type];
uint8_t *cmdptr = (void*)(pbuf + size + cmd_size);
if (max_size < size + lvp_ext_dgc_token_size(elayout, token))
abort();
if (print_cmds)
fprintf(stderr, "DGC %s\n", vk_IndirectCommandsTokenTypeEXT_to_str(token->type));
switch (token->type) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT: {
uint32_t *data = input;
const VkIndirectCommandsExecutionSetTokenEXT *info = token->data.pExecutionSet;
if (info->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) {
cmd->u.bind_pipeline.pipeline_bind_point = lvp_pipeline_types_from_shader_stages(info->shaderStages);
cmd->u.bind_pipeline.pipeline = iset->array[*data];
/* validate */
lvp_pipeline_from_handle(cmd->u.bind_pipeline.pipeline);
assert(cmd->u.bind_pipeline.pipeline && "cannot bind null pipeline!");
} else {
unsigned count = util_bitcount(info->shaderStages);
cmd->u.bind_shaders_ext.stage_count = count;
cmd->u.bind_shaders_ext.stages = (void*)cmdptr;
int i = 0;
u_foreach_bit(stage, info->shaderStages) {
cmd->u.bind_shaders_ext.stages[i] = BITFIELD_BIT(stage);
assert(cmd->u.bind_shaders_ext.stages[i] && "cannot bind null shader stage!");
i++;
}
cmd->u.bind_shaders_ext.shaders = (void*)(cmdptr + sizeof(int64_t) * count);
for (unsigned i = 0; i < count; i++) {
cmd->u.bind_shaders_ext.shaders[i] = iset->array[data[i]];
if (cmd->u.bind_shaders_ext.shaders[i])
lvp_shader_from_handle(cmd->u.bind_shaders_ext.shaders[i]);
}
}
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT: {
uint32_t *data = input;
const VkIndirectCommandsPushConstantTokenEXT *info = token->data.pPushConstant;
cmd->u.push_constants2_khr.push_constants_info = (void*)cmdptr;
VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info;
pci->layout = elayout->vk.layout;
pci->stageFlags = VK_SHADER_STAGE_ALL;
pci->offset = info->updateRange.offset;
pci->size = info->updateRange.size;
pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR));
if (token->type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT)
memcpy((void*)pci->pValues, data, info->updateRange.size);
else
memcpy((void*)pci->pValues, &seq, info->updateRange.size);
break;
}
/* these are the DXGI format values to avoid needing the full header */
#define DXGI_FORMAT_R32_UINT 42
#define DXGI_FORMAT_R16_UINT 57
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT: {
const VkIndirectCommandsIndexBufferTokenEXT *info = token->data.pIndexBuffer;
VkBindIndexBufferIndirectCommandEXT *data = input;
cmd->u.bind_index_buffer2_khr.offset = 0;
if (data->bufferAddress)
cmd->u.bind_index_buffer2_khr.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_index_buffer.offset);
else
cmd->u.bind_index_buffer2_khr.buffer = VK_NULL_HANDLE;
if (info->mode == VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT) {
cmd->u.bind_index_buffer2_khr.index_type = data->indexType;
} else {
switch ((int)data->indexType) {
case DXGI_FORMAT_R32_UINT:
cmd->u.bind_index_buffer2_khr.index_type = VK_INDEX_TYPE_UINT32;
break;
case DXGI_FORMAT_R16_UINT:
cmd->u.bind_index_buffer2_khr.index_type = VK_INDEX_TYPE_UINT16;
break;
default:
unreachable("unknown DXGI index type!");
}
}
cmd->u.bind_index_buffer2_khr.size = data->size;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT: {
VkBindVertexBufferIndirectCommandEXT *data = input;
cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets);
cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
if (max_size < size + cmd_size)
abort();
cmd->u.bind_vertex_buffers2.first_binding = token->data.pVertexBuffer->vertexBindingUnit;
cmd->u.bind_vertex_buffers2.binding_count = 1;
cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr;
uint32_t alloc_offset = sizeof(*cmd->u.bind_vertex_buffers2.buffers);
cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + alloc_offset);
alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.offsets);
cmd->u.bind_vertex_buffers2.sizes = (void*)(cmdptr + alloc_offset);
alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.sizes);
cmd->u.bind_vertex_buffers2.offsets[0] = 0;
cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE;
cmd->u.bind_vertex_buffers2.sizes[0] = data->size;
cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + alloc_offset);
cmd->u.bind_vertex_buffers2.strides[0] = data->stride;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT: {
VkDispatchIndirectCommand *data = input;
memcpy(&cmd->u.dispatch, data, sizeof(VkDispatchIndirectCommand));
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT: {
VkDrawIndexedIndirectCommand *data = input;
memcpy(&cmd->u.draw_indexed, data, sizeof(VkDrawIndexedIndirectCommand));
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT: {
VkDrawIndirectCommand *data = input;
memcpy(&cmd->u.draw, data, sizeof(VkDrawIndirectCommand));
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT: {
VkDrawIndirectCountIndirectCommandEXT *data = input;
cmd->u.draw_indexed_indirect.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_indexed_indirect.offset);
cmd->u.draw_indexed_indirect.draw_count = MIN2(data->commandCount, maxDrawCount);
cmd->u.draw_indexed_indirect.stride = data->stride;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT: {
VkDrawIndirectCountIndirectCommandEXT *data = input;
cmd->u.draw_indirect.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_indirect.offset);
cmd->u.draw_indirect.draw_count = MIN2(data->commandCount, maxDrawCount);
cmd->u.draw_indirect.stride = data->stride;
break;
}
// only available if VK_EXT_mesh_shader is supported
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT: {
VkDrawMeshTasksIndirectCommandEXT *data = input;
memcpy(&cmd->u.draw_mesh_tasks_ext, data, sizeof(VkDrawIndirectCountIndirectCommandEXT));
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT: {
VkDrawIndirectCountIndirectCommandEXT *data = input;
cmd->u.draw_mesh_tasks_indirect_ext.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.draw_mesh_tasks_indirect_ext.offset);
cmd->u.draw_mesh_tasks_indirect_ext.draw_count = MIN2(data->commandCount, maxDrawCount);
cmd->u.draw_mesh_tasks_indirect_ext.stride = data->stride;
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT: {
VkTraceRaysIndirectCommand2KHR *data = input;
VkStridedDeviceAddressRegionKHR *sbts = (void*)cmdptr;
cmd->u.trace_rays_khr.raygen_shader_binding_table = &sbts[0];
cmd->u.trace_rays_khr.raygen_shader_binding_table->deviceAddress = data->raygenShaderRecordAddress;
cmd->u.trace_rays_khr.raygen_shader_binding_table->stride = data->raygenShaderRecordSize;
cmd->u.trace_rays_khr.raygen_shader_binding_table->size = data->raygenShaderRecordSize;
cmd->u.trace_rays_khr.miss_shader_binding_table = &sbts[1];
cmd->u.trace_rays_khr.miss_shader_binding_table->deviceAddress = data->missShaderBindingTableAddress;
cmd->u.trace_rays_khr.miss_shader_binding_table->stride = data->missShaderBindingTableStride;
cmd->u.trace_rays_khr.miss_shader_binding_table->size = data->missShaderBindingTableSize;
cmd->u.trace_rays_khr.hit_shader_binding_table = &sbts[2];
cmd->u.trace_rays_khr.hit_shader_binding_table->deviceAddress = data->hitShaderBindingTableAddress;
cmd->u.trace_rays_khr.hit_shader_binding_table->stride = data->hitShaderBindingTableStride;
cmd->u.trace_rays_khr.hit_shader_binding_table->size = data->hitShaderBindingTableSize;
cmd->u.trace_rays_khr.callable_shader_binding_table = &sbts[3];
cmd->u.trace_rays_khr.callable_shader_binding_table->deviceAddress = data->callableShaderBindingTableAddress;
cmd->u.trace_rays_khr.callable_shader_binding_table->stride = data->callableShaderBindingTableStride;
cmd->u.trace_rays_khr.callable_shader_binding_table->size = data->callableShaderBindingTableSize;
cmd->u.trace_rays_khr.width = data->width;
cmd->u.trace_rays_khr.height = data->height;
cmd->u.trace_rays_khr.depth = data->depth;
break;
}
default:
unreachable("unknown token type");
break;
}
size += lvp_ext_dgc_token_size(elayout, token);
list_addtail(&cmd->cmd_link, list);
}
return size;
}
static void
handle_preprocess_generated_commands_ext(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds)
{
VkGeneratedCommandsInfoEXT *pre = cmd->u.preprocess_generated_commands_ext.generated_commands_info;
VK_FROM_HANDLE(lvp_indirect_command_layout_ext, elayout, pre->indirectCommandsLayout);
VK_FROM_HANDLE(lvp_indirect_execution_set, iset, pre->indirectExecutionSet);
unsigned seq_count = pre->maxSequenceCount;
if (pre->sequenceCountAddress) {
uint32_t *count = (void*)(uintptr_t)pre->sequenceCountAddress;
seq_count = MIN2(seq_count, *count);
}
struct list_head *list = (void*)(uintptr_t)pre->preprocessAddress;
size_t size = sizeof(struct list_head);
size_t max_size = pre->preprocessSize;
if (size > max_size)
abort();
list_inithead(list);
size_t offset = size;
uint8_t *p = (void*)(uintptr_t)pre->preprocessAddress;
for (unsigned i = 0; i < seq_count; i++) {
offset += process_sequence_ext(state, iset, elayout, list, p + offset, max_size, (void*)(uintptr_t)pre->indirectAddress, i, pre->maxDrawCount, print_cmds);
assert(offset);
}
/* vk_cmd_queue will copy the binary and break the list, so null the tail pointer */
list->prev->next = NULL;
}
static void
handle_execute_generated_commands_ext(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds)
{
VkGeneratedCommandsInfoEXT *gen = cmd->u.execute_generated_commands_ext.generated_commands_info;
struct vk_cmd_execute_generated_commands_ext *exec = &cmd->u.execute_generated_commands_ext;
if (!exec->is_preprocessed) {
struct vk_cmd_queue_entry pre;
pre.u.preprocess_generated_commands_ext.generated_commands_info = exec->generated_commands_info;
handle_preprocess_generated_commands_ext(&pre, state, print_cmds);
}
uint8_t *p = (void*)(uintptr_t)gen->preprocessAddress;
struct list_head *list = (void*)p;
struct vk_cmd_queue_entry *exec_cmd = list_first_entry(list, struct vk_cmd_queue_entry, cmd_link);
if (exec_cmd)
lvp_execute_cmd_buffer(list, state, print_cmds);
}
static void
handle_descriptor_buffers(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
{
@@ -4788,6 +5050,8 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp)
ENQUEUE_CMD(CmdBindPipelineShaderGroupNV)
ENQUEUE_CMD(CmdPreprocessGeneratedCommandsNV)
ENQUEUE_CMD(CmdExecuteGeneratedCommandsNV)
ENQUEUE_CMD(CmdPreprocessGeneratedCommandsEXT)
ENQUEUE_CMD(CmdExecuteGeneratedCommandsEXT)
#ifdef VK_ENABLE_BETA_EXTENSIONS
ENQUEUE_CMD(CmdInitializeGraphScratchMemoryAMDX)
@@ -4823,7 +5087,7 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds,
LIST_FOR_EACH_ENTRY(cmd, cmds, cmd_link) {
if (print_cmds)
fprintf(stderr, "%s\n", vk_cmd_queue_type_names[cmd->type]);
switch (cmd->type) {
switch ((unsigned)cmd->type) {
case VK_CMD_BIND_PIPELINE:
handle_pipeline(cmd, state);
break;
@@ -5151,6 +5415,12 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds,
case VK_CMD_EXECUTE_GENERATED_COMMANDS_NV:
handle_execute_generated_commands(cmd, state, print_cmds);
break;
case VK_CMD_PREPROCESS_GENERATED_COMMANDS_EXT:
handle_preprocess_generated_commands_ext(cmd, state, print_cmds);
break;
case VK_CMD_EXECUTE_GENERATED_COMMANDS_EXT:
handle_execute_generated_commands_ext(cmd, state, print_cmds);
break;
case VK_CMD_BIND_DESCRIPTOR_BUFFERS_EXT:
handle_descriptor_buffers(cmd, state);
break;
@@ -69,6 +69,7 @@ typedef uint32_t xcb_window_t;
#include "vk_buffer.h"
#include "vk_buffer_view.h"
#include "vk_device.h"
#include "vk_device_generated_commands.h"
#include "vk_instance.h"
#include "vk_image.h"
#include "vk_log.h"
@@ -638,6 +639,29 @@ struct lvp_indirect_command_layout_nv {
VkIndirectCommandsLayoutTokenNV tokens[0];
};
struct lvp_indirect_execution_set {
struct vk_object_base base;
bool is_shaders;
#if VK_USE_64_BIT_PTR_DEFINES
void *array[0];
#else
uint64_t array[0];
#endif
};
enum lvp_indirect_layout_type {
LVP_INDIRECT_COMMAND_LAYOUT_DRAW,
LVP_INDIRECT_COMMAND_LAYOUT_DRAW_COUNT,
LVP_INDIRECT_COMMAND_LAYOUT_DISPATCH,
LVP_INDIRECT_COMMAND_LAYOUT_RAYS,
};
struct lvp_indirect_command_layout_ext {
struct vk_indirect_command_layout vk;
enum lvp_indirect_layout_type type;
VkIndirectCommandsLayoutTokenEXT tokens[0];
};
extern const struct vk_command_buffer_ops lvp_cmd_buffer_ops;
static inline const struct lvp_descriptor_set_layout *
@@ -695,6 +719,10 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_sampler, vk.base, VkSampler,
VK_OBJECT_TYPE_SAMPLER)
VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout_nv, base, VkIndirectCommandsLayoutNV,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_command_layout_ext, vk.base, VkIndirectCommandsLayoutEXT,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT)
VK_DEFINE_NONDISP_HANDLE_CASTS(lvp_indirect_execution_set, base, VkIndirectExecutionSetEXT,
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT)
void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp);
@@ -795,6 +823,10 @@ lvp_create_ahb_memory(struct lvp_device *device, struct lvp_device_memory *mem,
const VkMemoryAllocateInfo *pAllocateInfo);
#endif
enum vk_cmd_type
lvp_ext_dgc_token_to_cmd_type(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token);
size_t
lvp_ext_dgc_token_size(const struct lvp_indirect_command_layout_ext *elayout, const VkIndirectCommandsLayoutTokenEXT *token);
#ifdef __cplusplus
}
#endif