vc4: Try to fix the HW-2116 workaround.
We were incrementing the count at the end of vc4_start_draw(), except that that function returns immediately if we've already started drawing on this batch. It also failed to count the statechanges from the GFXH-515 workaround. This incidentally allows repeated glClear() to be coalesced, because the fast clears aren't counted in draw_calls_queued any more. Fixes most of the extra flushes in Processing, which emits glClear(Z|S); glClear(Z); glClear(C) during its frame setup. Improves performance of Processing's QuadRendering demo at 5000 quads by 3.33538% +/- 2.05846% (n=21 before, 15 after)
This commit is contained in:
@@ -32,6 +32,8 @@
|
||||
#include "vc4_context.h"
|
||||
#include "vc4_resource.h"
|
||||
|
||||
#define VC4_HW_2116_COUNT 0x1ef0
|
||||
|
||||
static void
|
||||
vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
|
||||
{
|
||||
@@ -108,7 +110,6 @@ vc4_start_draw(struct vc4_context *vc4)
|
||||
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
|
||||
|
||||
job->needs_flush = true;
|
||||
job->draw_calls_queued++;
|
||||
job->draw_width = vc4->framebuffer.width;
|
||||
job->draw_height = vc4->framebuffer.height;
|
||||
|
||||
@@ -264,12 +265,12 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
|
||||
* tiles with VC4_PACKET_RETURN_FROM_LIST.
|
||||
*/
|
||||
static void
|
||||
vc4_hw_2116_workaround(struct pipe_context *pctx)
|
||||
vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count)
|
||||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
struct vc4_job *job = vc4_get_job_for_fbo(vc4);
|
||||
|
||||
if (job->draw_calls_queued == 0x1ef0) {
|
||||
if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) {
|
||||
perf_debug("Flushing batch due to HW-2116 workaround "
|
||||
"(too many draw calls per scene\n");
|
||||
vc4_job_submit(vc4, job);
|
||||
@@ -294,7 +295,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
vc4_predraw_check_textures(pctx, &vc4->verttex);
|
||||
vc4_predraw_check_textures(pctx, &vc4->fragtex);
|
||||
|
||||
vc4_hw_2116_workaround(pctx);
|
||||
vc4_hw_2116_workaround(pctx, info->count);
|
||||
|
||||
struct vc4_job *job = vc4_get_job_for_fbo(vc4);
|
||||
|
||||
@@ -308,7 +309,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
vc4_start_draw(vc4);
|
||||
vc4_update_compiled_shaders(vc4, info->mode);
|
||||
|
||||
uint32_t start_draw_calls_queued = job->draw_calls_queued;
|
||||
vc4_emit_state(pctx);
|
||||
|
||||
if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
|
||||
@@ -362,6 +362,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
cl_u32(&bcl, info->count);
|
||||
cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
|
||||
cl_u32(&bcl, vc4->max_index);
|
||||
job->draw_calls_queued++;
|
||||
|
||||
if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
|
||||
pipe_resource_reference(&prsc, NULL);
|
||||
@@ -430,6 +431,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
cl_u8(&bcl, info->mode);
|
||||
cl_u32(&bcl, this_count);
|
||||
cl_u32(&bcl, start);
|
||||
job->draw_calls_queued++;
|
||||
|
||||
count -= step;
|
||||
extra_index_bias += start + step;
|
||||
@@ -438,11 +440,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
}
|
||||
cl_end(&job->bcl, bcl);
|
||||
|
||||
/* No flushes of the job should have happened between when we started
|
||||
* emitting state for our draw and when we just emitted our draw's
|
||||
* primitives.
|
||||
/* We shouldn't have tripped the HW_2116 bug with the GFXH-515
|
||||
* workaround.
|
||||
*/
|
||||
assert(start_draw_calls_queued == job->draw_calls_queued);
|
||||
assert(job->draw_calls_queued <= VC4_HW_2116_COUNT);
|
||||
|
||||
if (vc4->zsa && vc4->framebuffer.zsbuf) {
|
||||
struct vc4_resource *rsc =
|
||||
|
||||
Reference in New Issue
Block a user