anv: use a single generation shader for indirect draws

The indirect draw count shader can be used as a more generic case of
the indirect draw one. We'll never enter the last condition of the
shader (writing the MI_BATCH_BUFFER_START) with non count variants of
draws.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20497>
This commit is contained in:
Lionel Landwerlin
2023-01-02 02:01:48 +02:00
committed by Marge Bot
parent 897a92f576
commit caf2389bc5
9 changed files with 118 additions and 409 deletions
@@ -32,7 +32,6 @@
#include "anv_generated_indirect_draws.h"
#include "shaders/generated_draws_spv.h"
#include "shaders/generated_draws_count_spv.h"
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
* 64bit addresses. Binding 2 is left UBO that would normally be accessed
@@ -286,8 +285,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
char name[40];
} indirect_draws_key = {
.name = "anv-generated-indirect-draws",
}, indirect_draws_count_key = {
.name = "anv-generated-indirect-draws-count",
};
device->generated_draw_kernel =
@@ -303,7 +300,10 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
sizeof(indirect_draws_key),
generated_draws_spv_source,
ARRAY_SIZE(generated_draws_spv_source),
10 /* 2 * (2 loads + 3 stores) */);
11 /*
* 2 * (2 indirect data loads + 3 3DPRIMITVE stores) +
* 1 store (MI_BATCH_BUFFER_START)
*/);
}
if (device->generated_draw_kernel == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -313,29 +313,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
*/
anv_shader_bin_unref(device, device->generated_draw_kernel);
device->generated_draw_count_kernel =
anv_device_search_for_kernel(device,
device->internal_cache,
&indirect_draws_count_key,
sizeof(indirect_draws_count_key),
NULL);
if (device->generated_draw_count_kernel == NULL) {
device->generated_draw_count_kernel =
compile_upload_spirv(device,
&indirect_draws_count_key,
sizeof(indirect_draws_count_key),
generated_draws_count_spv_source,
ARRAY_SIZE(generated_draws_count_spv_source),
11 /* 2 * (3 loads + 3 stores) */);
}
if (device->generated_draw_count_kernel == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* The cache already has a reference and it's not going anywhere so there
* is no need to hold a second reference.
*/
anv_shader_bin_unref(device, device->generated_draw_count_kernel);
return VK_SUCCESS;
}
@@ -26,29 +26,19 @@
#include <stdint.h>
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
/* This needs to match generated_draws.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct anv_generated_indirect_draw_params {
uint32_t is_indexed;
uint32_t is_predicated;
uint32_t draw_base;
uint32_t draw_count;
uint32_t instance_multiplier;
uint32_t indirect_data_stride;
};
/* This needs to match generated_draws_count.glsl :
*
* layout(set = 0, binding = 2) uniform block
*/
struct anv_generated_indirect_draw_count_params {
uint32_t is_indexed;
uint32_t is_predicated;
uint32_t flags;
uint32_t draw_base;
uint32_t item_count;
uint32_t draw_count;
uint32_t max_draw_count;
uint32_t instance_multiplier;
uint32_t indirect_data_stride;
uint32_t end_addr_ldw;
@@ -56,10 +46,7 @@ struct anv_generated_indirect_draw_count_params {
};
struct anv_generate_indirect_params {
union {
struct anv_generated_indirect_draw_params draw;
struct anv_generated_indirect_draw_count_params draw_count;
};
struct anv_generated_indirect_draw_params draw;
/* Global address of binding 0 */
uint64_t indirect_data_addr;
-1
View File
@@ -1230,7 +1230,6 @@ struct anv_device {
* workaround slowness with indirect draw calls.
*/
struct anv_shader_bin *generated_draw_kernel;
struct anv_shader_bin *generated_draw_count_kernel;
const struct intel_l3_config *generated_draw_l3_config;
pthread_mutex_t mutex;
+4 -2
View File
@@ -4727,6 +4727,7 @@ void genX(CmdDrawIndirect)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndirectCommand)),
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
false /* indexed */);
} else {
@@ -4768,6 +4769,7 @@ void genX(CmdDrawIndexedIndirect)(
cmd_buffer,
anv_address_add(buffer->address, offset),
MAX2(stride, sizeof(VkDrawIndexedIndirectCommand)),
ANV_NULL_ADDRESS /* count_addr */,
drawCount,
true /* indexed */);
} else {
@@ -4966,7 +4968,7 @@ void genX(CmdDrawIndirectCount)(
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws_count)(
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_address,
stride,
@@ -5023,7 +5025,7 @@ void genX(CmdDrawIndexedIndirectCount)(
#if GFX_HAS_GENERATED_CMDS
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
genX(cmd_buffer_emit_indirect_generated_draws_count)(
genX(cmd_buffer_emit_indirect_generated_draws)(
cmd_buffer,
indirect_data_address,
stride,
@@ -176,6 +176,34 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit(batch, GENX(3DSTATE_WM), wm);
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, device->info, prog_data,
1 /* rasterization_samples */,
0 /* msaa_flags */);
ps.VectorMaskEnable = prog_data->uses_vmask;
ps.BindingTableEntryCount = 0;
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
psx.PixelShaderValid = true;
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
@@ -272,16 +300,7 @@ genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer,
});
}
static struct anv_state
genX(cmd_buffer_alloc_generated_push_data)(struct anv_cmd_buffer *cmd_buffer)
{
return anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
sizeof(struct anv_generate_indirect_params),
ANV_UBO_ALIGNMENT);
}
static struct anv_state
static void
genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
struct anv_state push_data_state)
{
@@ -313,67 +332,45 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
c.ConstantBody.Buffer[0] = push_data_addr;
}
#endif
return push_data_state;
}
static struct anv_generate_indirect_params *
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t draw_cmd_stride,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
uint32_t item_count,
struct anv_address count_addr,
uint32_t max_count,
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
struct anv_batch *batch = &cmd_buffer->generation_batch;
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data_const(draw_kernel->prog_data);
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, device->info, prog_data,
1 /* rasterization_samples */,
0 /* msaa_flags */);
ps.VectorMaskEnable = prog_data->uses_vmask;
ps.BindingTableEntryCount = 0;
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
struct anv_state push_data_state =
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
sizeof(struct anv_generate_indirect_params),
ANV_UBO_ALIGNMENT);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
struct anv_generate_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generate_indirect_params) {
.draw = {
.is_indexed = indexed,
.is_predicated = cmd_buffer->state.conditional_render_enabled,
.flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
(cmd_buffer->state.conditional_render_enabled ?
ANV_GENERATED_FLAG_PREDICATED : 0) |
((draw_cmd_stride / 4) << 16),
.draw_base = item_base,
.draw_count = item_count,
.item_count = item_count,
/* If count_addr is not NULL, we'll edit it through a the command
* streamer.
*/
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.max_draw_count = max_count,
.instance_multiplier = pipeline->instance_multiplier,
.indirect_data_stride = indirect_data_stride,
},
@@ -381,6 +378,29 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
};
if (!anv_address_is_null(count_addr)) {
/* Copy the draw count into the push constants so that the generation
* gets the value straight away and doesn't even need to access memory.
*/
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, batch);
mi_memcpy(&b,
anv_address_add((struct anv_address) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = push_data_state.offset,
},
offsetof(struct anv_generate_indirect_params, draw.draw_count)),
count_addr, 4);
/* Make sure the memcpy landed for the generating draw call to pick up
* the value.
*/
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;
}
}
/* Only emit the data after the memcpy above. */
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
@@ -430,8 +450,8 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
uint64_t end_addr =
anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
while (params != NULL) {
params->draw_count.end_addr_ldw = end_addr & 0xffffffff;
params->draw_count.end_addr_udw = end_addr >> 32;
params->draw.end_addr_ldw = end_addr & 0xffffffff;
params->draw.end_addr_udw = end_addr >> 32;
params = params->prev;
}
}
@@ -440,7 +460,8 @@ static void
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t draw_count,
struct anv_address count_addr,
uint32_t max_draw_count,
bool indexed)
{
genX(flush_pipeline_select_3d)(cmd_buffer);
@@ -473,179 +494,6 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
struct anv_generate_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < draw_count) {
const uint32_t item_count = MIN2(draw_count - item_base,
MAX_GENERATED_DRAW_COUNT);
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
/* Ensure we have enough contiguous space for all the draws so that the
* compute shader can edit all the 3DPRIMITIVEs from a single base
* address.
*
* TODO: we might have to split that if the amount of space is to large (at
* 1Mb?).
*/
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
draw_cmd_size);
if (result != VK_SUCCESS)
return;
struct anv_generate_indirect_params *params =
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
indirect_data_addr,
indirect_data_stride,
item_base,
item_count,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
params->prev = last_params;
last_params = params;
}
}
static struct anv_generate_indirect_params *
genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
uint32_t item_count,
struct anv_address count_addr,
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
struct anv_batch *batch = &cmd_buffer->generation_batch;
const struct anv_shader_bin *draw_kernel =
device->generated_draw_count_kernel;
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data_const(draw_kernel->prog_data);
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
ps.BindingTableEntryCount = 2;
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
ps._32PixelDispatchEnable = prog_data->dispatch_32;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
struct anv_state push_data_state =
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
struct anv_generate_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generate_indirect_params) {
.draw_count = {
.is_indexed = indexed,
.is_predicated = cmd_buffer->state.conditional_render_enabled,
.draw_base = item_base,
.item_count = item_count,
.draw_count = 0, // Edit this through a the command streamer
.instance_multiplier = pipeline->instance_multiplier,
.indirect_data_stride = indirect_data_stride,
},
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
};
/* Copy the draw count into the push constants so that the generation gets
* the value straight away and doesn't even need to access memory.
*/
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, batch);
mi_memcpy(&b,
anv_address_add((struct anv_address) {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = push_data_state.offset,
},
offsetof(struct anv_generate_indirect_params, draw_count.draw_count)),
count_addr, 4);
/* Make sure the memcpy landed for the generating draw call to pick up the
* value.
*/
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;
}
/* Only emit the data after the memcpy above. */
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
return push_data;
}
static void
genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
struct anv_address count_addr,
uint32_t max_draw_count,
bool indexed)
{
genX(flush_pipeline_select_3d)(cmd_buffer);
/* Apply the pipeline flush here so the indirect data is available for the
* generation shader.
*/
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
if (anv_address_is_null(cmd_buffer->generation_return_addr))
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
/* In order to have the vertex fetch gather the data we need to have a non
* 0 stride. It's possible to have a 0 stride given by the application when
* draw_count is 1, but we need a correct value for the
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
* correctly :
*
* Vulkan spec, vkCmdDrawIndirect:
*
* "If drawCount is less than or equal to one, stride is ignored."
*/
assert(indirect_data_stride > 0);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
/* Emit the 3D state in the main batch. */
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
struct anv_generate_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < max_draw_count) {
@@ -666,15 +514,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_
return;
struct anv_generate_indirect_params *params =
genX(cmd_buffer_emit_generate_draws_count)(
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
anv_address_add(indirect_data_addr,
item_base * indirect_data_stride),
draw_cmd_stride,
indirect_data_addr,
indirect_data_stride,
item_base,
item_count,
count_addr,
max_draw_count,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
+2 -3
View File
@@ -114,8 +114,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
_gfx_ver = g[0]
libanv_per_hw_ver_libs += static_library(
'anv_per_hw_ver@0@'.format(_gfx_ver),
[anv_per_hw_ver_files, g[1], anv_entrypoints[0],
generated_draws_spv_h, generated_draws_count_spv_h],
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spv_h, ],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
],
@@ -206,7 +205,7 @@ libanv_common = static_library(
[
libanv_files, anv_entrypoints, sha1_h,
gen_xml_pack, float64_spv_h,
generated_draws_spv_h, generated_draws_count_spv_h
generated_draws_spv_h,
],
include_directories : [
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
+26 -6
View File
@@ -23,6 +23,11 @@
#version 450
#define BITFIELD_BIT(i) (1u << (i))
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
/* These 2 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint indirect_data[];
@@ -34,24 +39,29 @@ layout(set = 0, binding = 1, std430) buffer Storage1 {
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint is_indexed;
uint is_predicated;
uint flags;
uint draw_base;
uint item_count;
uint draw_count;
uint max_draw_count;
uint instance_multiplier;
uint indirect_data_stride;
uint end_addr_ldw;
uint end_addr_udw;
};
void main()
{
bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
uint _3dprim_dw_size = 10;
uint cmd_idx = uint(item_idx) * _3dprim_dw_size;
uint cmd_idx = item_idx * _3dprim_dw_size;
uint draw_id = draw_base + item_idx;
if (draw_id < draw_count) {
if (is_indexed != 0) {
if (is_indexed) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
@@ -63,7 +73,7 @@ void main()
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
uint(is_predicated) << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
@@ -85,7 +95,7 @@ void main()
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
uint(is_predicated) << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 0;
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
@@ -97,5 +107,15 @@ void main()
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
}
} else if (draw_id == draw_count && draw_id < max_draw_count) {
/* Only write a jump forward in the batch if we have fewer elements than
* the max draw count.
*/
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
49 << 23 | /* MI Command Opcode */
1 << 8 | /* Address Space Indicator (PPGTT) */
1 << 0); /* DWord Length */
commands[cmd_idx + 1] = end_addr_ldw;
commands[cmd_idx + 2] = end_addr_udw;
}
}
@@ -1,111 +0,0 @@
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 450
/* These 2 bindings will be accessed through A64 messages */
layout(set = 0, binding = 0, std430) buffer Storage0 {
uint indirect_data[];
};
layout(set = 0, binding = 1, std430) buffer Storage1 {
uint commands[];
};
/* This data will be provided through push constants. */
layout(set = 0, binding = 2) uniform block {
uint is_indexed;
uint is_predicated;
uint draw_base;
uint item_count;
uint draw_count;
uint instance_multiplier;
uint indirect_data_stride;
uint end_addr_ldw;
uint end_addr_udw;
};
void main()
{
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
uint _3dprim_dw_size = 10;
uint cmd_idx = item_idx * _3dprim_dw_size;
uint draw_id = draw_base + item_idx;
if (draw_id < draw_count) {
if (is_indexed != 0) {
/* Loading a VkDrawIndexedIndirectCommand */
uint index_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_index = indirect_data[indirect_data_offset + 2];
uint vertex_offset = indirect_data[indirect_data_offset + 3];
uint first_instance = indirect_data[indirect_data_offset + 4];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
} else {
/* Loading a VkDrawIndirectCommand structure */
uint vertex_count = indirect_data[indirect_data_offset + 0];
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
uint first_vertex = indirect_data[indirect_data_offset + 2];
uint first_instance = indirect_data[indirect_data_offset + 3];
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
3 << 27 | /* Command SubType */
3 << 24 | /* 3D Command Opcode */
1 << 11 | /* Extended Parameter Enable */
is_predicated << 8 |
8 << 0); /* DWord Length */
commands[cmd_idx + 1] = 0;
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
commands[cmd_idx + 4] = instance_count; /* Instance Count */
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
}
} else if (draw_id == draw_count) {
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
49 << 23 | /* MI Command Opcode */
1 << 8 | /* Address Space Indicator (PPGTT) */
1 << 0); /* DWord Length */
commands[cmd_idx + 1] = end_addr_ldw;
commands[cmd_idx + 2] = end_addr_udw;
}
}
-13
View File
@@ -44,16 +44,3 @@ generated_draws_spv_h = custom_target(
'--stage', 'frag',
]
)
generated_draws_count_spv_h = custom_target(
'generated_draws_count_spv.h',
input : [glsl2spirv, 'generated_draws_count.glsl'],
output : 'generated_draws_count_spv.h',
command : [
prog_python, '@INPUT@', '@OUTPUT@',
prog_glslang,
'--vn', 'generated_draws_count_spv_source',
'--glsl-version', '450',
'--stage', 'frag',
]
)