i965/fs: Make emit_shader_time_end() insert before EOT.
Previously, we emitted the shader-time epilogue from emit_fb_writes(), during the middle of looping through color regions (or emit_urb_writes for the VS). This is duplicated several times and rather awkward. I need to fix a bug in our FB write handling, and it will be a lot easier if we move emit_shader_time_end() out of there. Now, we simply emit FB writes/URB writes, and subsequently have emit_shader_time_end() insert instructions before the final SEND with EOT. Not only is this simpler, it's actually a slight improvement: we now include the MOVs to set up the final FB write payload in our shader-time measurements. Note that INTEL_DEBUG=shader_time only exists on Gen7+, and uses send-from-GRF. (In the past, we might have hit trouble where both attempt to use MRFs for messages; that's not a problem now.) v2: Rebase on v3 of the previous patch and other shader_time fixes. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> [v1] Acked-by: Matt Turner <mattst88@gmail.com> Cc: mesa-stable@lists.freedesktop.org
This commit is contained in:
@@ -759,19 +759,24 @@ fs_visitor::emit_shader_time_end()
|
||||
unreachable("fs_visitor::emit_shader_time_end missing code");
|
||||
}
|
||||
|
||||
/* Insert our code just before the final SEND with EOT. */
|
||||
exec_node *end = this->instructions.get_tail();
|
||||
assert(end && ((fs_inst *) end)->eot);
|
||||
|
||||
fs_inst *tm_read;
|
||||
fs_reg shader_end_time = get_timestamp(&tm_read);
|
||||
emit(tm_read);
|
||||
end->insert_before(tm_read);
|
||||
|
||||
/* Check that there weren't any timestamp reset events (assuming these
|
||||
* were the only two timestamp reads that happened).
|
||||
*/
|
||||
fs_reg reset = shader_end_time;
|
||||
reset.set_smear(2);
|
||||
fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
|
||||
fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
|
||||
test->conditional_mod = BRW_CONDITIONAL_Z;
|
||||
test->force_writemask_all = true;
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
end->insert_before(test);
|
||||
end->insert_before(IF(BRW_PREDICATE_NORMAL));
|
||||
|
||||
fs_reg start = shader_start_time;
|
||||
start.negate = true;
|
||||
@@ -779,7 +784,7 @@ fs_visitor::emit_shader_time_end()
|
||||
diff.set_smear(0);
|
||||
fs_inst *add = ADD(diff, start, shader_end_time);
|
||||
add->force_writemask_all = true;
|
||||
emit(add);
|
||||
end->insert_before(add);
|
||||
|
||||
/* If there were no instructions between the two timestamp gets, the diff
|
||||
* is 2 cycles. Remove that overhead, so I can forget about that when
|
||||
@@ -787,13 +792,13 @@ fs_visitor::emit_shader_time_end()
|
||||
*/
|
||||
add = ADD(diff, diff, fs_reg(-2u));
|
||||
add->force_writemask_all = true;
|
||||
emit(add);
|
||||
end->insert_before(add);
|
||||
|
||||
emit(SHADER_TIME_ADD(type, diff));
|
||||
emit(SHADER_TIME_ADD(written_type, fs_reg(1u)));
|
||||
emit(BRW_OPCODE_ELSE);
|
||||
emit(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
end->insert_before(SHADER_TIME_ADD(type, diff));
|
||||
end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
|
||||
end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
|
||||
end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
|
||||
end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
|
||||
}
|
||||
|
||||
fs_inst *
|
||||
@@ -3922,6 +3927,9 @@ fs_visitor::run_fs()
|
||||
|
||||
emit_fb_writes();
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
|
||||
calculate_cfg();
|
||||
|
||||
optimize();
|
||||
|
||||
@@ -3670,9 +3670,6 @@ fs_visitor::emit_fb_writes()
|
||||
|
||||
fs_inst *inst;
|
||||
if (do_dual_src) {
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
|
||||
this->current_annotation = ralloc_asprintf(this->mem_ctx,
|
||||
"FB dual-source write");
|
||||
inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
|
||||
@@ -3712,19 +3709,12 @@ fs_visitor::emit_fb_writes()
|
||||
if (brw->gen >= 6 && key->replicate_alpha && target != 0)
|
||||
src0_alpha = offset(outputs[0], 3);
|
||||
|
||||
if (target == key->nr_color_regions - 1 &&
|
||||
(INTEL_DEBUG & DEBUG_SHADER_TIME))
|
||||
emit_shader_time_end();
|
||||
|
||||
inst = emit_single_fb_write(this->outputs[target], reg_undef,
|
||||
src0_alpha,
|
||||
this->output_components[target]);
|
||||
inst->target = target;
|
||||
}
|
||||
} else {
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
|
||||
/* Even if there's no color buffers enabled, we still need to send
|
||||
* alpha out the pipeline to our null renderbuffer to support
|
||||
* alpha-testing, alpha-to-coverage, and so on.
|
||||
@@ -3935,9 +3925,6 @@ fs_visitor::emit_urb_writes()
|
||||
if (length == 8 || last)
|
||||
flush = true;
|
||||
if (flush) {
|
||||
if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME))
|
||||
emit_shader_time_end();
|
||||
|
||||
fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
|
||||
fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
|
||||
Reference in New Issue
Block a user