anv: use a single generation shader for indirect draws
The indirect draw count shader can be used as a more generic case of the indirect draw one. We'll never enter the last condition of the shader (writing the MI_BATCH_BUFFER_START) with non count variants of draws. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20497>
This commit is contained in:
committed by
Marge Bot
parent
897a92f576
commit
caf2389bc5
@@ -32,7 +32,6 @@
|
||||
#include "anv_generated_indirect_draws.h"
|
||||
|
||||
#include "shaders/generated_draws_spv.h"
|
||||
#include "shaders/generated_draws_count_spv.h"
|
||||
|
||||
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
|
||||
* 64bit addresses. Binding 2 is left UBO that would normally be accessed
|
||||
@@ -286,8 +285,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||
char name[40];
|
||||
} indirect_draws_key = {
|
||||
.name = "anv-generated-indirect-draws",
|
||||
}, indirect_draws_count_key = {
|
||||
.name = "anv-generated-indirect-draws-count",
|
||||
};
|
||||
|
||||
device->generated_draw_kernel =
|
||||
@@ -303,7 +300,10 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||
sizeof(indirect_draws_key),
|
||||
generated_draws_spv_source,
|
||||
ARRAY_SIZE(generated_draws_spv_source),
|
||||
10 /* 2 * (2 loads + 3 stores) */);
|
||||
11 /*
|
||||
* 2 * (2 indirect data loads + 3 3DPRIMITVE stores) +
|
||||
* 1 store (MI_BATCH_BUFFER_START)
|
||||
*/);
|
||||
}
|
||||
if (device->generated_draw_kernel == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
@@ -313,29 +313,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||
*/
|
||||
anv_shader_bin_unref(device, device->generated_draw_kernel);
|
||||
|
||||
device->generated_draw_count_kernel =
|
||||
anv_device_search_for_kernel(device,
|
||||
device->internal_cache,
|
||||
&indirect_draws_count_key,
|
||||
sizeof(indirect_draws_count_key),
|
||||
NULL);
|
||||
if (device->generated_draw_count_kernel == NULL) {
|
||||
device->generated_draw_count_kernel =
|
||||
compile_upload_spirv(device,
|
||||
&indirect_draws_count_key,
|
||||
sizeof(indirect_draws_count_key),
|
||||
generated_draws_count_spv_source,
|
||||
ARRAY_SIZE(generated_draws_count_spv_source),
|
||||
11 /* 2 * (3 loads + 3 stores) */);
|
||||
}
|
||||
if (device->generated_draw_count_kernel == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* The cache already has a reference and it's not going anywhere so there
|
||||
* is no need to hold a second reference.
|
||||
*/
|
||||
anv_shader_bin_unref(device, device->generated_draw_count_kernel);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,29 +26,19 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
||||
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
||||
|
||||
/* This needs to match generated_draws.glsl :
|
||||
*
|
||||
* layout(set = 0, binding = 2) uniform block
|
||||
*/
|
||||
struct anv_generated_indirect_draw_params {
|
||||
uint32_t is_indexed;
|
||||
uint32_t is_predicated;
|
||||
uint32_t draw_base;
|
||||
uint32_t draw_count;
|
||||
uint32_t instance_multiplier;
|
||||
uint32_t indirect_data_stride;
|
||||
};
|
||||
|
||||
/* This needs to match generated_draws_count.glsl :
|
||||
*
|
||||
* layout(set = 0, binding = 2) uniform block
|
||||
*/
|
||||
struct anv_generated_indirect_draw_count_params {
|
||||
uint32_t is_indexed;
|
||||
uint32_t is_predicated;
|
||||
uint32_t flags;
|
||||
uint32_t draw_base;
|
||||
uint32_t item_count;
|
||||
uint32_t draw_count;
|
||||
uint32_t max_draw_count;
|
||||
uint32_t instance_multiplier;
|
||||
uint32_t indirect_data_stride;
|
||||
uint32_t end_addr_ldw;
|
||||
@@ -56,10 +46,7 @@ struct anv_generated_indirect_draw_count_params {
|
||||
};
|
||||
|
||||
struct anv_generate_indirect_params {
|
||||
union {
|
||||
struct anv_generated_indirect_draw_params draw;
|
||||
struct anv_generated_indirect_draw_count_params draw_count;
|
||||
};
|
||||
struct anv_generated_indirect_draw_params draw;
|
||||
|
||||
/* Global address of binding 0 */
|
||||
uint64_t indirect_data_addr;
|
||||
|
||||
@@ -1230,7 +1230,6 @@ struct anv_device {
|
||||
* workaround slowness with indirect draw calls.
|
||||
*/
|
||||
struct anv_shader_bin *generated_draw_kernel;
|
||||
struct anv_shader_bin *generated_draw_count_kernel;
|
||||
const struct intel_l3_config *generated_draw_l3_config;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
|
||||
@@ -4727,6 +4727,7 @@ void genX(CmdDrawIndirect)(
|
||||
cmd_buffer,
|
||||
anv_address_add(buffer->address, offset),
|
||||
MAX2(stride, sizeof(VkDrawIndirectCommand)),
|
||||
ANV_NULL_ADDRESS /* count_addr */,
|
||||
drawCount,
|
||||
false /* indexed */);
|
||||
} else {
|
||||
@@ -4768,6 +4769,7 @@ void genX(CmdDrawIndexedIndirect)(
|
||||
cmd_buffer,
|
||||
anv_address_add(buffer->address, offset),
|
||||
MAX2(stride, sizeof(VkDrawIndexedIndirectCommand)),
|
||||
ANV_NULL_ADDRESS /* count_addr */,
|
||||
drawCount,
|
||||
true /* indexed */);
|
||||
} else {
|
||||
@@ -4966,7 +4968,7 @@ void genX(CmdDrawIndirectCount)(
|
||||
|
||||
#if GFX_HAS_GENERATED_CMDS
|
||||
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
||||
genX(cmd_buffer_emit_indirect_generated_draws_count)(
|
||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||
cmd_buffer,
|
||||
indirect_data_address,
|
||||
stride,
|
||||
@@ -5023,7 +5025,7 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||
|
||||
#if GFX_HAS_GENERATED_CMDS
|
||||
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
||||
genX(cmd_buffer_emit_indirect_generated_draws_count)(
|
||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||
cmd_buffer,
|
||||
indirect_data_address,
|
||||
stride,
|
||||
|
||||
@@ -176,6 +176,34 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_WM), wm);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||
intel_set_ps_dispatch_state(&ps, device->info, prog_data,
|
||||
1 /* rasterization_samples */,
|
||||
0 /* msaa_flags */);
|
||||
|
||||
ps.VectorMaskEnable = prog_data->uses_vmask;
|
||||
|
||||
ps.BindingTableEntryCount = 0;
|
||||
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||
prog_data->base.ubo_ranges[0].length;
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
|
||||
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
|
||||
}
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
||||
psx.PixelShaderValid = true;
|
||||
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||
@@ -272,16 +300,7 @@ genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer,
|
||||
});
|
||||
}
|
||||
|
||||
static struct anv_state
|
||||
genX(cmd_buffer_alloc_generated_push_data)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
return anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
sizeof(struct anv_generate_indirect_params),
|
||||
ANV_UBO_ALIGNMENT);
|
||||
}
|
||||
|
||||
|
||||
static struct anv_state
|
||||
static void
|
||||
genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_state push_data_state)
|
||||
{
|
||||
@@ -313,67 +332,45 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
|
||||
c.ConstantBody.Buffer[0] = push_data_addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
return push_data_state;
|
||||
}
|
||||
|
||||
static struct anv_generate_indirect_params *
|
||||
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address generated_cmds_addr,
|
||||
uint32_t draw_cmd_stride,
|
||||
struct anv_address indirect_data_addr,
|
||||
uint32_t indirect_data_stride,
|
||||
uint32_t item_base,
|
||||
uint32_t item_count,
|
||||
struct anv_address count_addr,
|
||||
uint32_t max_count,
|
||||
bool indexed)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||
const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel;
|
||||
const struct brw_wm_prog_data *prog_data =
|
||||
brw_wm_prog_data_const(draw_kernel->prog_data);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||
intel_set_ps_dispatch_state(&ps, device->info, prog_data,
|
||||
1 /* rasterization_samples */,
|
||||
0 /* msaa_flags */);
|
||||
|
||||
ps.VectorMaskEnable = prog_data->uses_vmask;
|
||||
|
||||
ps.BindingTableEntryCount = 0;
|
||||
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||
prog_data->base.ubo_ranges[0].length;
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
|
||||
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
|
||||
}
|
||||
|
||||
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
|
||||
|
||||
struct anv_state push_data_state =
|
||||
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
sizeof(struct anv_generate_indirect_params),
|
||||
ANV_UBO_ALIGNMENT);
|
||||
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
||||
struct anv_generate_indirect_params *push_data = push_data_state.map;
|
||||
*push_data = (struct anv_generate_indirect_params) {
|
||||
.draw = {
|
||||
.is_indexed = indexed,
|
||||
.is_predicated = cmd_buffer->state.conditional_render_enabled,
|
||||
.flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
|
||||
(cmd_buffer->state.conditional_render_enabled ?
|
||||
ANV_GENERATED_FLAG_PREDICATED : 0) |
|
||||
((draw_cmd_stride / 4) << 16),
|
||||
.draw_base = item_base,
|
||||
.draw_count = item_count,
|
||||
.item_count = item_count,
|
||||
/* If count_addr is not NULL, we'll edit it through a the command
|
||||
* streamer.
|
||||
*/
|
||||
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
|
||||
.max_draw_count = max_count,
|
||||
.instance_multiplier = pipeline->instance_multiplier,
|
||||
.indirect_data_stride = indirect_data_stride,
|
||||
},
|
||||
@@ -381,6 +378,29 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||
};
|
||||
|
||||
if (!anv_address_is_null(count_addr)) {
|
||||
/* Copy the draw count into the push constants so that the generation
|
||||
* gets the value straight away and doesn't even need to access memory.
|
||||
*/
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, batch);
|
||||
mi_memcpy(&b,
|
||||
anv_address_add((struct anv_address) {
|
||||
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = push_data_state.offset,
|
||||
},
|
||||
offsetof(struct anv_generate_indirect_params, draw.draw_count)),
|
||||
count_addr, 4);
|
||||
|
||||
/* Make sure the memcpy landed for the generating draw call to pick up
|
||||
* the value.
|
||||
*/
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Only emit the data after the memcpy above. */
|
||||
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||
@@ -430,8 +450,8 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint64_t end_addr =
|
||||
anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
|
||||
while (params != NULL) {
|
||||
params->draw_count.end_addr_ldw = end_addr & 0xffffffff;
|
||||
params->draw_count.end_addr_udw = end_addr >> 32;
|
||||
params->draw.end_addr_ldw = end_addr & 0xffffffff;
|
||||
params->draw.end_addr_udw = end_addr >> 32;
|
||||
params = params->prev;
|
||||
}
|
||||
}
|
||||
@@ -440,7 +460,8 @@ static void
|
||||
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address indirect_data_addr,
|
||||
uint32_t indirect_data_stride,
|
||||
uint32_t draw_count,
|
||||
struct anv_address count_addr,
|
||||
uint32_t max_draw_count,
|
||||
bool indexed)
|
||||
{
|
||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||
@@ -473,179 +494,6 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||
|
||||
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
||||
|
||||
struct anv_generate_indirect_params *last_params = NULL;
|
||||
uint32_t item_base = 0;
|
||||
while (item_base < draw_count) {
|
||||
const uint32_t item_count = MIN2(draw_count - item_base,
|
||||
MAX_GENERATED_DRAW_COUNT);
|
||||
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
|
||||
|
||||
/* Ensure we have enough contiguous space for all the draws so that the
|
||||
* compute shader can edit all the 3DPRIMITIVEs from a single base
|
||||
* address.
|
||||
*
|
||||
* TODO: we might have to split that if the amount of space is to large (at
|
||||
* 1Mb?).
|
||||
*/
|
||||
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
|
||||
draw_cmd_size);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
struct anv_generate_indirect_params *params =
|
||||
genX(cmd_buffer_emit_generate_draws)(
|
||||
cmd_buffer,
|
||||
anv_batch_current_address(&cmd_buffer->batch),
|
||||
indirect_data_addr,
|
||||
indirect_data_stride,
|
||||
item_base,
|
||||
item_count,
|
||||
indexed);
|
||||
|
||||
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
|
||||
|
||||
item_base += item_count;
|
||||
|
||||
params->prev = last_params;
|
||||
last_params = params;
|
||||
}
|
||||
}
|
||||
|
||||
static struct anv_generate_indirect_params *
|
||||
genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address generated_cmds_addr,
|
||||
struct anv_address indirect_data_addr,
|
||||
uint32_t indirect_data_stride,
|
||||
uint32_t item_base,
|
||||
uint32_t item_count,
|
||||
struct anv_address count_addr,
|
||||
bool indexed)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_batch *batch = &cmd_buffer->generation_batch;
|
||||
const struct anv_shader_bin *draw_kernel =
|
||||
device->generated_draw_count_kernel;
|
||||
const struct brw_wm_prog_data *prog_data =
|
||||
brw_wm_prog_data_const(draw_kernel->prog_data);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS), ps) {
|
||||
ps.BindingTableEntryCount = 2;
|
||||
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||
prog_data->base.ubo_ranges[0].length;
|
||||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
ps._32PixelDispatchEnable = prog_data->dispatch_32;
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
|
||||
ps.KernelStartPointer0 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
ps.KernelStartPointer2 = draw_kernel->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
|
||||
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
|
||||
}
|
||||
|
||||
genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count);
|
||||
|
||||
struct anv_state push_data_state =
|
||||
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
|
||||
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
||||
struct anv_generate_indirect_params *push_data = push_data_state.map;
|
||||
*push_data = (struct anv_generate_indirect_params) {
|
||||
.draw_count = {
|
||||
.is_indexed = indexed,
|
||||
.is_predicated = cmd_buffer->state.conditional_render_enabled,
|
||||
.draw_base = item_base,
|
||||
.item_count = item_count,
|
||||
.draw_count = 0, // Edit this through a the command streamer
|
||||
.instance_multiplier = pipeline->instance_multiplier,
|
||||
.indirect_data_stride = indirect_data_stride,
|
||||
},
|
||||
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||
};
|
||||
|
||||
/* Copy the draw count into the push constants so that the generation gets
|
||||
* the value straight away and doesn't even need to access memory.
|
||||
*/
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, batch);
|
||||
mi_memcpy(&b,
|
||||
anv_address_add((struct anv_address) {
|
||||
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = push_data_state.offset,
|
||||
},
|
||||
offsetof(struct anv_generate_indirect_params, draw_count.draw_count)),
|
||||
count_addr, 4);
|
||||
/* Make sure the memcpy landed for the generating draw call to pick up the
|
||||
* value.
|
||||
*/
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
|
||||
/* Only emit the data after the memcpy above. */
|
||||
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||
prim.VertexAccessType = SEQUENTIAL;
|
||||
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||
prim.VertexCountPerInstance = 3;
|
||||
prim.InstanceCount = 1;
|
||||
}
|
||||
|
||||
return push_data;
|
||||
}
|
||||
|
||||
static void
|
||||
genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address indirect_data_addr,
|
||||
uint32_t indirect_data_stride,
|
||||
struct anv_address count_addr,
|
||||
uint32_t max_draw_count,
|
||||
bool indexed)
|
||||
{
|
||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||
|
||||
/* Apply the pipeline flush here so the indirect data is available for the
|
||||
* generation shader.
|
||||
*/
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
if (anv_address_is_null(cmd_buffer->generation_return_addr))
|
||||
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
|
||||
|
||||
/* In order to have the vertex fetch gather the data we need to have a non
|
||||
* 0 stride. It's possible to have a 0 stride given by the application when
|
||||
* draw_count is 1, but we need a correct value for the
|
||||
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
|
||||
* correctly :
|
||||
*
|
||||
* Vulkan spec, vkCmdDrawIndirect:
|
||||
*
|
||||
* "If drawCount is less than or equal to one, stride is ignored."
|
||||
*/
|
||||
assert(indirect_data_stride > 0);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
|
||||
/* Emit the 3D state in the main batch. */
|
||||
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
||||
|
||||
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
||||
|
||||
struct anv_generate_indirect_params *last_params = NULL;
|
||||
uint32_t item_base = 0;
|
||||
while (item_base < max_draw_count) {
|
||||
@@ -666,15 +514,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_
|
||||
return;
|
||||
|
||||
struct anv_generate_indirect_params *params =
|
||||
genX(cmd_buffer_emit_generate_draws_count)(
|
||||
genX(cmd_buffer_emit_generate_draws)(
|
||||
cmd_buffer,
|
||||
anv_batch_current_address(&cmd_buffer->batch),
|
||||
anv_address_add(indirect_data_addr,
|
||||
item_base * indirect_data_stride),
|
||||
draw_cmd_stride,
|
||||
indirect_data_addr,
|
||||
indirect_data_stride,
|
||||
item_base,
|
||||
item_count,
|
||||
count_addr,
|
||||
max_draw_count,
|
||||
indexed);
|
||||
|
||||
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
|
||||
|
||||
@@ -114,8 +114,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']],
|
||||
_gfx_ver = g[0]
|
||||
libanv_per_hw_ver_libs += static_library(
|
||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0],
|
||||
generated_draws_spv_h, generated_draws_count_spv_h],
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spv_h, ],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||
],
|
||||
@@ -206,7 +205,7 @@ libanv_common = static_library(
|
||||
[
|
||||
libanv_files, anv_entrypoints, sha1_h,
|
||||
gen_xml_pack, float64_spv_h,
|
||||
generated_draws_spv_h, generated_draws_count_spv_h
|
||||
generated_draws_spv_h,
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
|
||||
@@ -23,6 +23,11 @@
|
||||
|
||||
#version 450
|
||||
|
||||
#define BITFIELD_BIT(i) (1u << (i))
|
||||
|
||||
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
||||
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
||||
|
||||
/* These 2 bindings will be accessed through A64 messages */
|
||||
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||
uint indirect_data[];
|
||||
@@ -34,24 +39,29 @@ layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||
|
||||
/* This data will be provided through push constants. */
|
||||
layout(set = 0, binding = 2) uniform block {
|
||||
uint is_indexed;
|
||||
uint is_predicated;
|
||||
uint flags;
|
||||
uint draw_base;
|
||||
uint item_count;
|
||||
uint draw_count;
|
||||
uint max_draw_count;
|
||||
uint instance_multiplier;
|
||||
uint indirect_data_stride;
|
||||
uint end_addr_ldw;
|
||||
uint end_addr_udw;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
|
||||
bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
|
||||
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
|
||||
uint _3dprim_dw_size = 10;
|
||||
uint cmd_idx = uint(item_idx) * _3dprim_dw_size;
|
||||
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||
uint draw_id = draw_base + item_idx;
|
||||
|
||||
if (draw_id < draw_count) {
|
||||
if (is_indexed != 0) {
|
||||
if (is_indexed) {
|
||||
/* Loading a VkDrawIndexedIndirectCommand */
|
||||
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||
@@ -63,7 +73,7 @@ void main()
|
||||
3 << 27 | /* Command SubType */
|
||||
3 << 24 | /* 3D Command Opcode */
|
||||
1 << 11 | /* Extended Parameter Enable */
|
||||
is_predicated << 8 |
|
||||
uint(is_predicated) << 8 |
|
||||
8 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
|
||||
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
|
||||
@@ -85,7 +95,7 @@ void main()
|
||||
3 << 27 | /* Command SubType */
|
||||
3 << 24 | /* 3D Command Opcode */
|
||||
1 << 11 | /* Extended Parameter Enable */
|
||||
is_predicated << 8 |
|
||||
uint(is_predicated) << 8 |
|
||||
8 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = 0;
|
||||
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
|
||||
@@ -97,5 +107,15 @@ void main()
|
||||
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||
}
|
||||
} else if (draw_id == draw_count && draw_id < max_draw_count) {
|
||||
/* Only write a jump forward in the batch if we have fewer elements than
|
||||
* the max draw count.
|
||||
*/
|
||||
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
|
||||
49 << 23 | /* MI Command Opcode */
|
||||
1 << 8 | /* Address Space Indicator (PPGTT) */
|
||||
1 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = end_addr_ldw;
|
||||
commands[cmd_idx + 2] = end_addr_udw;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2022 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 450
|
||||
|
||||
/* These 2 bindings will be accessed through A64 messages */
|
||||
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||
uint indirect_data[];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||
uint commands[];
|
||||
};
|
||||
|
||||
/* This data will be provided through push constants. */
|
||||
layout(set = 0, binding = 2) uniform block {
|
||||
uint is_indexed;
|
||||
uint is_predicated;
|
||||
uint draw_base;
|
||||
uint item_count;
|
||||
uint draw_count;
|
||||
uint instance_multiplier;
|
||||
uint indirect_data_stride;
|
||||
uint end_addr_ldw;
|
||||
uint end_addr_udw;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
|
||||
uint _3dprim_dw_size = 10;
|
||||
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||
uint draw_id = draw_base + item_idx;
|
||||
|
||||
if (draw_id < draw_count) {
|
||||
if (is_indexed != 0) {
|
||||
/* Loading a VkDrawIndexedIndirectCommand */
|
||||
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||
uint first_index = indirect_data[indirect_data_offset + 2];
|
||||
uint vertex_offset = indirect_data[indirect_data_offset + 3];
|
||||
uint first_instance = indirect_data[indirect_data_offset + 4];
|
||||
|
||||
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||
3 << 27 | /* Command SubType */
|
||||
3 << 24 | /* 3D Command Opcode */
|
||||
1 << 11 | /* Extended Parameter Enable */
|
||||
is_predicated << 8 |
|
||||
8 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = 1 << 8; /* Indexed */
|
||||
commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */
|
||||
commands[cmd_idx + 3] = first_index; /* Start Vertex Location */
|
||||
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||
commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */
|
||||
commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */
|
||||
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||
} else {
|
||||
/* Loading a VkDrawIndirectCommand structure */
|
||||
uint vertex_count = indirect_data[indirect_data_offset + 0];
|
||||
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||
uint first_vertex = indirect_data[indirect_data_offset + 2];
|
||||
uint first_instance = indirect_data[indirect_data_offset + 3];
|
||||
|
||||
commands[cmd_idx + 0] = (3 << 29 | /* Command Type */
|
||||
3 << 27 | /* Command SubType */
|
||||
3 << 24 | /* 3D Command Opcode */
|
||||
1 << 11 | /* Extended Parameter Enable */
|
||||
is_predicated << 8 |
|
||||
8 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = 0;
|
||||
commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */
|
||||
commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */
|
||||
commands[cmd_idx + 4] = instance_count; /* Instance Count */
|
||||
commands[cmd_idx + 5] = first_instance; /* Start Instance Location */
|
||||
commands[cmd_idx + 6] = 0; /* Base Vertex Location */
|
||||
commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */
|
||||
commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */
|
||||
commands[cmd_idx + 9] = draw_id; /* gl_DrawID */
|
||||
}
|
||||
} else if (draw_id == draw_count) {
|
||||
commands[cmd_idx + 0] = (0 << 29 | /* Command Type */
|
||||
49 << 23 | /* MI Command Opcode */
|
||||
1 << 8 | /* Address Space Indicator (PPGTT) */
|
||||
1 << 0); /* DWord Length */
|
||||
commands[cmd_idx + 1] = end_addr_ldw;
|
||||
commands[cmd_idx + 2] = end_addr_udw;
|
||||
}
|
||||
}
|
||||
@@ -44,16 +44,3 @@ generated_draws_spv_h = custom_target(
|
||||
'--stage', 'frag',
|
||||
]
|
||||
)
|
||||
|
||||
generated_draws_count_spv_h = custom_target(
|
||||
'generated_draws_count_spv.h',
|
||||
input : [glsl2spirv, 'generated_draws_count.glsl'],
|
||||
output : 'generated_draws_count_spv.h',
|
||||
command : [
|
||||
prog_python, '@INPUT@', '@OUTPUT@',
|
||||
prog_glslang,
|
||||
'--vn', 'generated_draws_count_spv_source',
|
||||
'--glsl-version', '450',
|
||||
'--stage', 'frag',
|
||||
]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user