diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 0126cd9b93f..3e13835100d 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -928,78 +928,33 @@ ldvary_sequence_inst(struct v3d_compile *c, struct qreg result) (struct qinst *) c->cur_block->instructions.prev; assert(producer); producer->is_ldvary_sequence = true; - c->ldvary_sequence_end_inst = producer; return result; } -static void -track_ldvary_pipelining(struct v3d_compile *c, struct qinst *ldvary) -{ - if (ldvary) { - ldvary->is_ldvary_sequence = true; - c->ldvary_sequence_length++; - if (c->ldvary_sequence_length == 1) { - ldvary->ldvary_pipelining_start = true; - c->ldvary_sequence_start_inst = ldvary; - } - } -} - static struct qreg emit_smooth_varying(struct v3d_compile *c, - struct qinst *ldvary, struct qreg vary, struct qreg w, struct qreg r5) { - track_ldvary_pipelining(c, ldvary); return ldvary_sequence_inst(c, vir_FADD(c, ldvary_sequence_inst(c, vir_FMUL(c, vary, w)), r5)); } static struct qreg emit_noperspective_varying(struct v3d_compile *c, - struct qinst *ldvary, struct qreg vary, struct qreg r5) { - track_ldvary_pipelining(c, ldvary); return ldvary_sequence_inst(c, vir_FADD(c, ldvary_sequence_inst(c, vir_MOV(c, vary)), r5)); } static struct qreg emit_flat_varying(struct v3d_compile *c, - struct qinst *ldvary, struct qreg vary, struct qreg r5) { - track_ldvary_pipelining(c, ldvary); vir_MOV_dest(c, c->undef, vary); return ldvary_sequence_inst(c, vir_MOV(c, r5)); } -static void -varying_sequence_end(struct v3d_compile *c) -{ - if (!c->ldvary_sequence_start_inst) { - assert(!c->ldvary_sequence_end_inst); - assert(c->ldvary_sequence_length == 0); - return; - } - - assert(c->ldvary_sequence_start_inst); - assert(c->ldvary_sequence_end_inst); - assert(c->ldvary_sequence_start_inst != c->ldvary_sequence_end_inst); - - /* We need at least two ldvary sequences to do some pipelining */ - if (c->ldvary_sequence_length == 1) - c->ldvary_sequence_start_inst->ldvary_pipelining_start = false; - - if (c->ldvary_sequence_length > 1) - c->ldvary_sequence_end_inst->ldvary_pipelining_end = true; - - c->ldvary_sequence_length = 0; - c->ldvary_sequence_start_inst = NULL; - c->ldvary_sequence_end_inst = NULL; -} - static struct qreg emit_fragment_varying(struct v3d_compile *c, nir_variable *var, int8_t input_idx, uint8_t swizzle, int array_index) @@ -1013,6 +968,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef, c->undef, c->undef); ldvary->qpu.sig.ldvary = true; + ldvary->is_ldvary_sequence = true; vary = vir_emit_def(c, ldvary); } else { vir_NOP(c)->qpu.sig.ldvary = true; @@ -1035,7 +991,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, */ if (!var) { assert(input_idx < 0); - return emit_smooth_varying(c, ldvary, vary, c->payload_w, r5); + return emit_smooth_varying(c, vary, c->payload_w, r5); } int i = c->num_inputs++; @@ -1049,22 +1005,21 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, case INTERP_MODE_SMOOTH: if (var->data.centroid) { BITSET_SET(c->centroid_flags, i); - result = emit_smooth_varying(c, ldvary, vary, + result = emit_smooth_varying(c, vary, c->payload_w_centroid, r5); } else { - result = emit_smooth_varying(c, ldvary, vary, - c->payload_w, r5); + result = emit_smooth_varying(c, vary, c->payload_w, r5); } break; case INTERP_MODE_NOPERSPECTIVE: BITSET_SET(c->noperspective_flags, i); - result = emit_noperspective_varying(c, ldvary, vary, r5); + result = emit_noperspective_varying(c, vary, r5); break; case INTERP_MODE_FLAT: BITSET_SET(c->flat_shade_flags, i); - result = emit_flat_varying(c, ldvary, vary, r5); + result = emit_flat_varying(c, vary, r5); break; default: @@ -2099,8 +2054,6 @@ ntq_setup_fs_inputs(struct v3d_compile *c) } } } - - varying_sequence_end(c); } static void diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index a40d4b1aef2..90dba7f48b0 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -461,6 +461,7 @@ struct choose_scoreboard { bool tlb_locked; bool ldvary_pipelining; bool fixup_ldvary; + int ldvary_count; }; static bool @@ -875,7 +876,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, } static struct schedule_node * -choose_instruction_to_schedule(const struct v3d_device_info *devinfo, +choose_instruction_to_schedule(struct v3d_compile *c, struct choose_scoreboard *scoreboard, struct schedule_node *prev_inst) { @@ -900,12 +901,6 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, continue; } - /* Sanity check: if we are scheduling a smooth ldvary sequence - * we cannot be starting another sequence in the middle of it. - */ - assert(!scoreboard->ldvary_pipelining || - !n->inst->ldvary_pipelining_start); - const struct v3d_qpu_instr *inst = &n->inst->qpu; /* Simulator complains if we have two uniforms loaded in the @@ -947,7 +942,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, if (reads_too_soon_after_write(scoreboard, n->inst)) continue; - if (writes_too_soon_after_write(devinfo, scoreboard, n->inst)) + if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst)) continue; /* "A scoreboard wait must not occur in the first two @@ -991,7 +986,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, continue; struct v3d_qpu_instr merged_inst; - if (!qpu_merge_inst(devinfo, &merged_inst, + if (!qpu_merge_inst(c->devinfo, &merged_inst, &prev_inst->inst->qpu, inst)) { continue; } @@ -1002,12 +997,13 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, */ if (scoreboard->ldvary_pipelining && inst->sig.ldvary) { assert(n->inst->is_ldvary_sequence); + scoreboard->ldvary_count++; scoreboard->fixup_ldvary = true; return n; } } - int prio = get_instruction_priority(devinfo, inst); + int prio = get_instruction_priority(c->devinfo, inst); if (mux_read_stalls(scoreboard, inst)) { /* Don't merge an instruction that stalls */ @@ -1045,39 +1041,49 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, } } - /* If we are in the middle of an ldvary sequence we only pick up - * instructions that can continue the sequence so we can pipeline - * them, however, if we failed to find anything to schedule then we - * can't possibly continue the sequence and we need to stop the - * pipelining process and try again. - * - * There is one exception to the above: noperspective or flat - * varyings can cause us to not be able to pick an instruction - * because they need a nop between the ldvary and the next instruction - * to account for the ldvary r5 write latency. We can try to detect this - * by checking if we are also unable to schedule an instruction after - * disabling pipelining. - * - * FIXME: dropping pipelining and picking up another instruction could - * break the sequence for flat/noperspective varyings we could've been - * able to continue if we returned NULL here and scheduled a NOP as a - * result, but detecting this case would require us to know in advance - * that emitting the next NOP will guarantee that we will be able to - * continue the sequence. - */ - if (scoreboard->ldvary_pipelining && !prev_inst && !chosen) { - scoreboard->ldvary_pipelining = false; - chosen = choose_instruction_to_schedule(devinfo, scoreboard, - prev_inst); - scoreboard->ldvary_pipelining = !chosen; - } else if (chosen) { - if (scoreboard->ldvary_pipelining) { - assert(chosen->inst->is_ldvary_sequence); + /* Update ldvary pipelining state */ + if (chosen) { + if (chosen->inst->qpu.sig.ldvary && + chosen->inst->is_ldvary_sequence) { scoreboard->ldvary_pipelining = - !chosen->inst->ldvary_pipelining_end; - } else if (chosen->inst->ldvary_pipelining_start) { - assert(chosen->inst->qpu.sig.ldvary); - scoreboard->ldvary_pipelining = true; + c->num_inputs > ++scoreboard->ldvary_count; + } + } else if (scoreboard->ldvary_pipelining) { + /* If we are in the middle of an ldvary sequence we only pick + * up instructions that can continue the sequence so we can + * pipeline them, however, if we failed to find anything to + * schedule (!prev_inst) then we can't possibly continue the + * sequence and we need to stop the pipelining process and try + * again. + * + * There is one exception to the above: noperspective or flat + * varyings can cause us to not be able to pick an instruction + * because they need a nop between the ldvary and the next + * instruction to account for the ldvary r5 write latency. We + * can try to detect this by checking if we are also unable to + * schedule an instruction after disabling pipelining. + * + * FIXME: dropping pipelining and picking up another instruction + * could break the sequence for flat/noperspective varyings we + * could've been able to continue if we returned NULL here and + * scheduled a NOP as a result, but detecting this case would + * require us to know in advance that emitting the next NOP will + * guarantee that we will be able to continue the sequence. + * + * If we failed to pair up (prev_inst != NULL), then we disable + * pipelining if we have already scheduled the last ldvary. This + * may allow any other instruction that is not part of an ldvary + * sequence to be merged into the last instruction of the last + * ldvary sequence for optimal results. + */ + if (!prev_inst) { + scoreboard->ldvary_pipelining = false; + chosen = choose_instruction_to_schedule(c, scoreboard, + prev_inst); + scoreboard->ldvary_pipelining = !chosen; + } else { + scoreboard->ldvary_pipelining = + c->num_inputs > scoreboard->ldvary_count; } } @@ -1667,9 +1673,7 @@ schedule_instructions(struct v3d_compile *c, while (!list_is_empty(&scoreboard->dag->heads)) { struct schedule_node *chosen = - choose_instruction_to_schedule(devinfo, - scoreboard, - NULL); + choose_instruction_to_schedule(c, scoreboard, NULL); struct schedule_node *merge = NULL; /* If there are no valid instructions to schedule, drop a NOP @@ -1702,8 +1706,7 @@ schedule_instructions(struct v3d_compile *c, pre_remove_head(scoreboard->dag, chosen); while ((merge = - choose_instruction_to_schedule(devinfo, - scoreboard, + choose_instruction_to_schedule(c, scoreboard, chosen))) { time = MAX2(merge->unblocked_time, time); pre_remove_head(scoreboard->dag, merge); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index ed8747d11a3..fafdf5a208f 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -165,14 +165,6 @@ struct qinst { /* Set if this instruction participates in a varying setup. */ bool is_ldvary_sequence; - /* Set if this is the ldvary instruction starting a sequence of - * varyings we want to pipeline. - */ - bool ldvary_pipelining_start; - /* Set if this is the last instruction involved with a pipelineable - * varying sequence. - */ - bool ldvary_pipelining_end; }; enum quniform_contents { @@ -780,11 +772,6 @@ struct v3d_compile { uint32_t program_id; uint32_t variant_id; - /* Used to track pipelinable sequences of varyings */ - struct qinst *ldvary_sequence_start_inst; - struct qinst *ldvary_sequence_end_inst; - uint32_t ldvary_sequence_length; - /* Set to compile program in in 1x, 2x, or 4x threaded mode, where * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of * limiting ourselves to the part of the physical reg space.