nir/range_analysis: look through f2f
Foz-DB Navi31: Totals from 93 (0.12% of 80273) affected shaders: Instrs: 123927 -> 121073 (-2.30%); split: -2.30%, +0.00% CodeSize: 670832 -> 653332 (-2.61%); split: -2.61%, +0.00% Latency: 337678 -> 322803 (-4.41%); split: -4.41%, +0.00% InvThroughput: 63277 -> 61083 (-3.47%) VClause: 460 -> 373 (-18.91%) SClause: 2178 -> 2100 (-3.58%) Copies: 7637 -> 7744 (+1.40%) PreSGPRs: 4414 -> 4287 (-2.88%) PreVGPRs: 4229 -> 4230 (+0.02%) VALU: 77375 -> 75693 (-2.17%) SALU: 16497 -> 16383 (-0.69%); split: -0.73%, +0.04% VMEM: 561 -> 477 (-14.97%) SMEM: 3197 -> 3113 (-2.63%) Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36468>
This commit is contained in:
@@ -640,6 +640,11 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||
case nir_op_fceil:
|
||||
case nir_op_ftrunc:
|
||||
case nir_op_ffract:
|
||||
case nir_op_f2f16:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_f2f16_rtne:
|
||||
case nir_op_f2f32:
|
||||
case nir_op_f2f64:
|
||||
case nir_op_fdot2:
|
||||
case nir_op_fdot3:
|
||||
case nir_op_fdot4:
|
||||
@@ -823,6 +828,32 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||
|
||||
break;
|
||||
|
||||
case nir_op_f2f16:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_f2f16_rtne:
|
||||
case nir_op_f2f32:
|
||||
case nir_op_f2f64: {
|
||||
r = unpack_data(src_res[0]);
|
||||
|
||||
bool rtz = alu->op == nir_op_f2f16_rtz;
|
||||
if (alu->op != nir_op_f2f16_rtne && alu->op != nir_op_f2f16_rtz) {
|
||||
nir_shader *shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
|
||||
unsigned execution_mode = shader->info.float_controls_execution_mode;
|
||||
rtz = nir_is_rounding_mode_rtz(execution_mode, alu->def.bit_size);
|
||||
}
|
||||
|
||||
if (alu->src[0].src.ssa->bit_size > alu->def.bit_size) {
|
||||
/* Unless we are rounding towards zero, large values can create Inf. */
|
||||
if (!rtz && r.range != eq_zero)
|
||||
r.is_finite = false;
|
||||
|
||||
/* Underflow can create new zeros. */
|
||||
r.range = union_ranges(r.range, eq_zero);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_fabs:
|
||||
r = unpack_data(src_res[0]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user