diff --git a/src/intel/vulkan/anv_generated_indirect_draws.c b/src/intel/vulkan/anv_generated_indirect_draws.c index 528786bae60..75db7491438 100644 --- a/src/intel/vulkan/anv_generated_indirect_draws.c +++ b/src/intel/vulkan/anv_generated_indirect_draws.c @@ -32,7 +32,6 @@ #include "anv_generated_indirect_draws.h" #include "shaders/generated_draws_spv.h" -#include "shaders/generated_draws_count_spv.h" /* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global * 64bit addresses. Binding 2 is left UBO that would normally be accessed @@ -286,8 +285,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device) char name[40]; } indirect_draws_key = { .name = "anv-generated-indirect-draws", - }, indirect_draws_count_key = { - .name = "anv-generated-indirect-draws-count", }; device->generated_draw_kernel = @@ -303,7 +300,10 @@ anv_device_init_generated_indirect_draws(struct anv_device *device) sizeof(indirect_draws_key), generated_draws_spv_source, ARRAY_SIZE(generated_draws_spv_source), - 10 /* 2 * (2 loads + 3 stores) */); + 11 /* + * 2 * (2 indirect data loads + 3 3DPRIMITVE stores) + + * 1 store (MI_BATCH_BUFFER_START) + */); } if (device->generated_draw_kernel == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -313,29 +313,6 @@ anv_device_init_generated_indirect_draws(struct anv_device *device) */ anv_shader_bin_unref(device, device->generated_draw_kernel); - device->generated_draw_count_kernel = - anv_device_search_for_kernel(device, - device->internal_cache, - &indirect_draws_count_key, - sizeof(indirect_draws_count_key), - NULL); - if (device->generated_draw_count_kernel == NULL) { - device->generated_draw_count_kernel = - compile_upload_spirv(device, - &indirect_draws_count_key, - sizeof(indirect_draws_count_key), - generated_draws_count_spv_source, - ARRAY_SIZE(generated_draws_count_spv_source), - 11 /* 2 * (3 loads + 3 stores) */); - } - if (device->generated_draw_count_kernel == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - /* The cache already has a reference and it's not going anywhere so there - * is no need to hold a second reference. - */ - anv_shader_bin_unref(device, device->generated_draw_count_kernel); - return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_generated_indirect_draws.h b/src/intel/vulkan/anv_generated_indirect_draws.h index f79c87c4f54..b0e149b775c 100644 --- a/src/intel/vulkan/anv_generated_indirect_draws.h +++ b/src/intel/vulkan/anv_generated_indirect_draws.h @@ -26,29 +26,19 @@ #include +#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) +#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) + /* This needs to match generated_draws.glsl : * * layout(set = 0, binding = 2) uniform block */ struct anv_generated_indirect_draw_params { - uint32_t is_indexed; - uint32_t is_predicated; - uint32_t draw_base; - uint32_t draw_count; - uint32_t instance_multiplier; - uint32_t indirect_data_stride; -}; - -/* This needs to match generated_draws_count.glsl : - * - * layout(set = 0, binding = 2) uniform block - */ -struct anv_generated_indirect_draw_count_params { - uint32_t is_indexed; - uint32_t is_predicated; + uint32_t flags; uint32_t draw_base; uint32_t item_count; uint32_t draw_count; + uint32_t max_draw_count; uint32_t instance_multiplier; uint32_t indirect_data_stride; uint32_t end_addr_ldw; @@ -56,10 +46,7 @@ struct anv_generated_indirect_draw_count_params { }; struct anv_generate_indirect_params { - union { - struct anv_generated_indirect_draw_params draw; - struct anv_generated_indirect_draw_count_params draw_count; - }; + struct anv_generated_indirect_draw_params draw; /* Global address of binding 0 */ uint64_t indirect_data_addr; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 89df13c7638..d1db54b13ab 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1230,7 +1230,6 @@ struct anv_device { * workaround slowness with indirect draw calls. */ struct anv_shader_bin *generated_draw_kernel; - struct anv_shader_bin *generated_draw_count_kernel; const struct intel_l3_config *generated_draw_l3_config; pthread_mutex_t mutex; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 6bd119c8923..bf2d6fc54f5 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4727,6 +4727,7 @@ void genX(CmdDrawIndirect)( cmd_buffer, anv_address_add(buffer->address, offset), MAX2(stride, sizeof(VkDrawIndirectCommand)), + ANV_NULL_ADDRESS /* count_addr */, drawCount, false /* indexed */); } else { @@ -4768,6 +4769,7 @@ void genX(CmdDrawIndexedIndirect)( cmd_buffer, anv_address_add(buffer->address, offset), MAX2(stride, sizeof(VkDrawIndexedIndirectCommand)), + ANV_NULL_ADDRESS /* count_addr */, drawCount, true /* indexed */); } else { @@ -4966,7 +4968,7 @@ void genX(CmdDrawIndirectCount)( #if GFX_HAS_GENERATED_CMDS if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) { - genX(cmd_buffer_emit_indirect_generated_draws_count)( + genX(cmd_buffer_emit_indirect_generated_draws)( cmd_buffer, indirect_data_address, stride, @@ -5023,7 +5025,7 @@ void genX(CmdDrawIndexedIndirectCount)( #if GFX_HAS_GENERATED_CMDS if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) { - genX(cmd_buffer_emit_indirect_generated_draws_count)( + genX(cmd_buffer_emit_indirect_generated_draws)( cmd_buffer, indirect_data_address, stride, diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 7cf864856e7..55c4edd4f97 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -176,6 +176,34 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(batch, GENX(3DSTATE_WM), wm); + anv_batch_emit(batch, GENX(3DSTATE_PS), ps) { + intel_set_ps_dispatch_state(&ps, device->info, prog_data, + 1 /* rasterization_samples */, + 0 /* msaa_flags */); + + ps.VectorMaskEnable = prog_data->uses_vmask; + + ps.BindingTableEntryCount = 0; + ps.PushConstantEnable = prog_data->base.nr_params > 0 || + prog_data->base.ubo_ranges[0].length; + + ps.DispatchGRFStartRegisterForConstantSetupData0 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); + ps.DispatchGRFStartRegisterForConstantSetupData2 = + brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); + + ps.KernelStartPointer0 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 0); + ps.KernelStartPointer1 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 1); + ps.KernelStartPointer2 = draw_kernel->kernel.offset + + brw_wm_prog_data_prog_offset(prog_data, ps, 2); + + ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1; + } + anv_batch_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { psx.PixelShaderValid = true; psx.AttributeEnable = prog_data->num_varying_inputs > 0; @@ -272,16 +300,7 @@ genX(cmd_buffer_emit_generate_draws_vertex)(struct anv_cmd_buffer *cmd_buffer, }); } -static struct anv_state -genX(cmd_buffer_alloc_generated_push_data)(struct anv_cmd_buffer *cmd_buffer) -{ - return anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - sizeof(struct anv_generate_indirect_params), - ANV_UBO_ALIGNMENT); -} - - -static struct anv_state +static void genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer, struct anv_state push_data_state) { @@ -313,67 +332,45 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer, c.ConstantBody.Buffer[0] = push_data_addr; } #endif - - return push_data_state; } static struct anv_generate_indirect_params * genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, struct anv_address generated_cmds_addr, + uint32_t draw_cmd_stride, struct anv_address indirect_data_addr, uint32_t indirect_data_stride, uint32_t item_base, uint32_t item_count, + struct anv_address count_addr, + uint32_t max_count, bool indexed) { - struct anv_device *device = cmd_buffer->device; struct anv_batch *batch = &cmd_buffer->generation_batch; - const struct anv_shader_bin *draw_kernel = device->generated_draw_kernel; - const struct brw_wm_prog_data *prog_data = - brw_wm_prog_data_const(draw_kernel->prog_data); - - anv_batch_emit(batch, GENX(3DSTATE_PS), ps) { - intel_set_ps_dispatch_state(&ps, device->info, prog_data, - 1 /* rasterization_samples */, - 0 /* msaa_flags */); - - ps.VectorMaskEnable = prog_data->uses_vmask; - - ps.BindingTableEntryCount = 0; - ps.PushConstantEnable = prog_data->base.nr_params > 0 || - prog_data->base.ubo_ranges[0].length; - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); - - ps.KernelStartPointer0 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 0); - ps.KernelStartPointer1 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 1); - ps.KernelStartPointer2 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 2); - - ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1; - } genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count); struct anv_state push_data_state = - genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer); + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + sizeof(struct anv_generate_indirect_params), + ANV_UBO_ALIGNMENT); struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; struct anv_generate_indirect_params *push_data = push_data_state.map; *push_data = (struct anv_generate_indirect_params) { .draw = { - .is_indexed = indexed, - .is_predicated = cmd_buffer->state.conditional_render_enabled, + .flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) | + (cmd_buffer->state.conditional_render_enabled ? + ANV_GENERATED_FLAG_PREDICATED : 0) | + ((draw_cmd_stride / 4) << 16), .draw_base = item_base, - .draw_count = item_count, + .item_count = item_count, + /* If count_addr is not NULL, we'll edit it through a the command + * streamer. + */ + .draw_count = anv_address_is_null(count_addr) ? max_count : 0, + .max_draw_count = max_count, .instance_multiplier = pipeline->instance_multiplier, .indirect_data_stride = indirect_data_stride, }, @@ -381,6 +378,29 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, .generated_cmds_addr = anv_address_physical(generated_cmds_addr), }; + if (!anv_address_is_null(count_addr)) { + /* Copy the draw count into the push constants so that the generation + * gets the value straight away and doesn't even need to access memory. + */ + struct mi_builder b; + mi_builder_init(&b, cmd_buffer->device->info, batch); + mi_memcpy(&b, + anv_address_add((struct anv_address) { + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = push_data_state.offset, + }, + offsetof(struct anv_generate_indirect_params, draw.draw_count)), + count_addr, 4); + + /* Make sure the memcpy landed for the generating draw call to pick up + * the value. + */ + anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + } + } + + /* Only emit the data after the memcpy above. */ genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state); anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) { @@ -430,8 +450,8 @@ genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer, uint64_t end_addr = anv_address_physical(anv_batch_current_address(&cmd_buffer->batch)); while (params != NULL) { - params->draw_count.end_addr_ldw = end_addr & 0xffffffff; - params->draw_count.end_addr_udw = end_addr >> 32; + params->draw.end_addr_ldw = end_addr & 0xffffffff; + params->draw.end_addr_udw = end_addr >> 32; params = params->prev; } } @@ -440,7 +460,8 @@ static void genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer, struct anv_address indirect_data_addr, uint32_t indirect_data_stride, - uint32_t draw_count, + struct anv_address count_addr, + uint32_t max_draw_count, bool indexed) { genX(flush_pipeline_select_3d)(cmd_buffer); @@ -473,179 +494,6 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length); - struct anv_generate_indirect_params *last_params = NULL; - uint32_t item_base = 0; - while (item_base < draw_count) { - const uint32_t item_count = MIN2(draw_count - item_base, - MAX_GENERATED_DRAW_COUNT); - const uint32_t draw_cmd_size = item_count * draw_cmd_stride; - - /* Ensure we have enough contiguous space for all the draws so that the - * compute shader can edit all the 3DPRIMITIVEs from a single base - * address. - * - * TODO: we might have to split that if the amount of space is to large (at - * 1Mb?). - */ - VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch, - draw_cmd_size); - if (result != VK_SUCCESS) - return; - - struct anv_generate_indirect_params *params = - genX(cmd_buffer_emit_generate_draws)( - cmd_buffer, - anv_batch_current_address(&cmd_buffer->batch), - indirect_data_addr, - indirect_data_stride, - item_base, - item_count, - indexed); - - anv_batch_advance(&cmd_buffer->batch, draw_cmd_size); - - item_base += item_count; - - params->prev = last_params; - last_params = params; - } -} - -static struct anv_generate_indirect_params * -genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address generated_cmds_addr, - struct anv_address indirect_data_addr, - uint32_t indirect_data_stride, - uint32_t item_base, - uint32_t item_count, - struct anv_address count_addr, - bool indexed) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_batch *batch = &cmd_buffer->generation_batch; - const struct anv_shader_bin *draw_kernel = - device->generated_draw_count_kernel; - const struct brw_wm_prog_data *prog_data = - brw_wm_prog_data_const(draw_kernel->prog_data); - - anv_batch_emit(batch, GENX(3DSTATE_PS), ps) { - ps.BindingTableEntryCount = 2; - ps.PushConstantEnable = prog_data->base.nr_params > 0 || - prog_data->base.ubo_ranges[0].length; - - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; - ps._32PixelDispatchEnable = prog_data->dispatch_32; - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); - - ps.KernelStartPointer0 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 0); - ps.KernelStartPointer1 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 1); - ps.KernelStartPointer2 = draw_kernel->kernel.offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 2); - - ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1; - } - - genX(cmd_buffer_emit_generate_draws_vertex)(cmd_buffer, item_count); - - struct anv_state push_data_state = - genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer); - - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - - struct anv_generate_indirect_params *push_data = push_data_state.map; - *push_data = (struct anv_generate_indirect_params) { - .draw_count = { - .is_indexed = indexed, - .is_predicated = cmd_buffer->state.conditional_render_enabled, - .draw_base = item_base, - .item_count = item_count, - .draw_count = 0, // Edit this through a the command streamer - .instance_multiplier = pipeline->instance_multiplier, - .indirect_data_stride = indirect_data_stride, - }, - .indirect_data_addr = anv_address_physical(indirect_data_addr), - .generated_cmds_addr = anv_address_physical(generated_cmds_addr), - }; - - /* Copy the draw count into the push constants so that the generation gets - * the value straight away and doesn't even need to access memory. - */ - struct mi_builder b; - mi_builder_init(&b, cmd_buffer->device->info, batch); - mi_memcpy(&b, - anv_address_add((struct anv_address) { - .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, - .offset = push_data_state.offset, - }, - offsetof(struct anv_generate_indirect_params, draw_count.draw_count)), - count_addr, 4); - /* Make sure the memcpy landed for the generating draw call to pick up the - * value. - */ - anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - } - - /* Only emit the data after the memcpy above. */ - genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state); - - anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) { - prim.VertexAccessType = SEQUENTIAL; - prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; - prim.VertexCountPerInstance = 3; - prim.InstanceCount = 1; - } - - return push_data; -} - -static void -genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address indirect_data_addr, - uint32_t indirect_data_stride, - struct anv_address count_addr, - uint32_t max_draw_count, - bool indexed) -{ - genX(flush_pipeline_select_3d)(cmd_buffer); - - /* Apply the pipeline flush here so the indirect data is available for the - * generation shader. - */ - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - if (anv_address_is_null(cmd_buffer->generation_return_addr)) - genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer); - - /* In order to have the vertex fetch gather the data we need to have a non - * 0 stride. It's possible to have a 0 stride given by the application when - * draw_count is 1, but we need a correct value for the - * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this - * correctly : - * - * Vulkan spec, vkCmdDrawIndirect: - * - * "If drawCount is less than or equal to one, stride is ignored." - */ - assert(indirect_data_stride > 0); - - if (cmd_buffer->state.conditional_render_enabled) - genX(cmd_emit_conditional_render_predicate)(cmd_buffer); - - /* Emit the 3D state in the main batch. */ - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); - - const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length); - struct anv_generate_indirect_params *last_params = NULL; uint32_t item_base = 0; while (item_base < max_draw_count) { @@ -666,15 +514,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_ return; struct anv_generate_indirect_params *params = - genX(cmd_buffer_emit_generate_draws_count)( + genX(cmd_buffer_emit_generate_draws)( cmd_buffer, anv_batch_current_address(&cmd_buffer->batch), - anv_address_add(indirect_data_addr, - item_base * indirect_data_stride), + draw_cmd_stride, + indirect_data_addr, indirect_data_stride, item_base, item_count, count_addr, + max_draw_count, indexed); anv_batch_advance(&cmd_buffer->batch, draw_cmd_size); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index b65af9cc8e8..752e70c3b61 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -114,8 +114,7 @@ foreach g : [['90', ['gfx8_cmd_buffer.c']], _gfx_ver = g[0] libanv_per_hw_ver_libs += static_library( 'anv_per_hw_ver@0@'.format(_gfx_ver), - [anv_per_hw_ver_files, g[1], anv_entrypoints[0], - generated_draws_spv_h, generated_draws_count_spv_h], + [anv_per_hw_ver_files, g[1], anv_entrypoints[0], generated_draws_spv_h, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel, ], @@ -206,7 +205,7 @@ libanv_common = static_library( [ libanv_files, anv_entrypoints, sha1_h, gen_xml_pack, float64_spv_h, - generated_draws_spv_h, generated_draws_count_spv_h + generated_draws_spv_h, ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, diff --git a/src/intel/vulkan/shaders/generated_draws.glsl b/src/intel/vulkan/shaders/generated_draws.glsl index 434b4d6a0ef..4bd8fb9845e 100644 --- a/src/intel/vulkan/shaders/generated_draws.glsl +++ b/src/intel/vulkan/shaders/generated_draws.glsl @@ -23,6 +23,11 @@ #version 450 +#define BITFIELD_BIT(i) (1u << (i)) + +#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0) +#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1) + /* These 2 bindings will be accessed through A64 messages */ layout(set = 0, binding = 0, std430) buffer Storage0 { uint indirect_data[]; @@ -34,24 +39,29 @@ layout(set = 0, binding = 1, std430) buffer Storage1 { /* This data will be provided through push constants. */ layout(set = 0, binding = 2) uniform block { - uint is_indexed; - uint is_predicated; + uint flags; uint draw_base; + uint item_count; uint draw_count; + uint max_draw_count; uint instance_multiplier; uint indirect_data_stride; + uint end_addr_ldw; + uint end_addr_udw; }; void main() { + bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0; + bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0; uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x); uint indirect_data_offset = item_idx * indirect_data_stride / 4; uint _3dprim_dw_size = 10; - uint cmd_idx = uint(item_idx) * _3dprim_dw_size; + uint cmd_idx = item_idx * _3dprim_dw_size; uint draw_id = draw_base + item_idx; if (draw_id < draw_count) { - if (is_indexed != 0) { + if (is_indexed) { /* Loading a VkDrawIndexedIndirectCommand */ uint index_count = indirect_data[indirect_data_offset + 0]; uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; @@ -63,7 +73,7 @@ void main() 3 << 27 | /* Command SubType */ 3 << 24 | /* 3D Command Opcode */ 1 << 11 | /* Extended Parameter Enable */ - is_predicated << 8 | + uint(is_predicated) << 8 | 8 << 0); /* DWord Length */ commands[cmd_idx + 1] = 1 << 8; /* Indexed */ commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */ @@ -85,7 +95,7 @@ void main() 3 << 27 | /* Command SubType */ 3 << 24 | /* 3D Command Opcode */ 1 << 11 | /* Extended Parameter Enable */ - is_predicated << 8 | + uint(is_predicated) << 8 | 8 << 0); /* DWord Length */ commands[cmd_idx + 1] = 0; commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */ @@ -97,5 +107,15 @@ void main() commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */ commands[cmd_idx + 9] = draw_id; /* gl_DrawID */ } + } else if (draw_id == draw_count && draw_id < max_draw_count) { + /* Only write a jump forward in the batch if we have fewer elements than + * the max draw count. + */ + commands[cmd_idx + 0] = (0 << 29 | /* Command Type */ + 49 << 23 | /* MI Command Opcode */ + 1 << 8 | /* Address Space Indicator (PPGTT) */ + 1 << 0); /* DWord Length */ + commands[cmd_idx + 1] = end_addr_ldw; + commands[cmd_idx + 2] = end_addr_udw; } } diff --git a/src/intel/vulkan/shaders/generated_draws_count.glsl b/src/intel/vulkan/shaders/generated_draws_count.glsl deleted file mode 100644 index 285da4097c6..00000000000 --- a/src/intel/vulkan/shaders/generated_draws_count.glsl +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright © 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 450 - -/* These 2 bindings will be accessed through A64 messages */ -layout(set = 0, binding = 0, std430) buffer Storage0 { - uint indirect_data[]; -}; - -layout(set = 0, binding = 1, std430) buffer Storage1 { - uint commands[]; -}; - -/* This data will be provided through push constants. */ -layout(set = 0, binding = 2) uniform block { - uint is_indexed; - uint is_predicated; - uint draw_base; - uint item_count; - uint draw_count; - uint instance_multiplier; - uint indirect_data_stride; - uint end_addr_ldw; - uint end_addr_udw; -}; - -void main() -{ - uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x); - uint indirect_data_offset = item_idx * indirect_data_stride / 4; - uint _3dprim_dw_size = 10; - uint cmd_idx = item_idx * _3dprim_dw_size; - uint draw_id = draw_base + item_idx; - - if (draw_id < draw_count) { - if (is_indexed != 0) { - /* Loading a VkDrawIndexedIndirectCommand */ - uint index_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; - uint first_index = indirect_data[indirect_data_offset + 2]; - uint vertex_offset = indirect_data[indirect_data_offset + 3]; - uint first_instance = indirect_data[indirect_data_offset + 4]; - - commands[cmd_idx + 0] = (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 3 << 24 | /* 3D Command Opcode */ - 1 << 11 | /* Extended Parameter Enable */ - is_predicated << 8 | - 8 << 0); /* DWord Length */ - commands[cmd_idx + 1] = 1 << 8; /* Indexed */ - commands[cmd_idx + 2] = index_count; /* Vertex Count Per Instance */ - commands[cmd_idx + 3] = first_index; /* Start Vertex Location */ - commands[cmd_idx + 4] = instance_count; /* Instance Count */ - commands[cmd_idx + 5] = first_instance; /* Start Instance Location */ - commands[cmd_idx + 6] = vertex_offset; /* Base Vertex Location */ - commands[cmd_idx + 7] = vertex_offset; /* gl_BaseVertex */ - commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */ - commands[cmd_idx + 9] = draw_id; /* gl_DrawID */ - } else { - /* Loading a VkDrawIndirectCommand structure */ - uint vertex_count = indirect_data[indirect_data_offset + 0]; - uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier; - uint first_vertex = indirect_data[indirect_data_offset + 2]; - uint first_instance = indirect_data[indirect_data_offset + 3]; - - commands[cmd_idx + 0] = (3 << 29 | /* Command Type */ - 3 << 27 | /* Command SubType */ - 3 << 24 | /* 3D Command Opcode */ - 1 << 11 | /* Extended Parameter Enable */ - is_predicated << 8 | - 8 << 0); /* DWord Length */ - commands[cmd_idx + 1] = 0; - commands[cmd_idx + 2] = vertex_count; /* Vertex Count Per Instance */ - commands[cmd_idx + 3] = first_vertex; /* Start Vertex Location */ - commands[cmd_idx + 4] = instance_count; /* Instance Count */ - commands[cmd_idx + 5] = first_instance; /* Start Instance Location */ - commands[cmd_idx + 6] = 0; /* Base Vertex Location */ - commands[cmd_idx + 7] = first_vertex; /* gl_BaseVertex */ - commands[cmd_idx + 8] = first_instance; /* gl_BaseInstance */ - commands[cmd_idx + 9] = draw_id; /* gl_DrawID */ - } - } else if (draw_id == draw_count) { - commands[cmd_idx + 0] = (0 << 29 | /* Command Type */ - 49 << 23 | /* MI Command Opcode */ - 1 << 8 | /* Address Space Indicator (PPGTT) */ - 1 << 0); /* DWord Length */ - commands[cmd_idx + 1] = end_addr_ldw; - commands[cmd_idx + 2] = end_addr_udw; - } -} diff --git a/src/intel/vulkan/shaders/meson.build b/src/intel/vulkan/shaders/meson.build index 9ed504c70d2..dae265e7278 100644 --- a/src/intel/vulkan/shaders/meson.build +++ b/src/intel/vulkan/shaders/meson.build @@ -44,16 +44,3 @@ generated_draws_spv_h = custom_target( '--stage', 'frag', ] ) - -generated_draws_count_spv_h = custom_target( - 'generated_draws_count_spv.h', - input : [glsl2spirv, 'generated_draws_count.glsl'], - output : 'generated_draws_count_spv.h', - command : [ - prog_python, '@INPUT@', '@OUTPUT@', - prog_glslang, - '--vn', 'generated_draws_count_spv_source', - '--glsl-version', '450', - '--stage', 'frag', - ] -)