aco/ra: disable p_start_linear_vgpr allocation hint

As this is, this will become useless soon. fossil-db (navi31): Totals from 176 (0.22% of 79242) affected shaders: Instrs: 101932 -> 102413 (+0.47%); split: -0.01%, +0.49% CodeSize: 541352 -> 543256 (+0.35%); split: -0.01%, +0.36% VGPRs: 7884 -> 7896 (+0.15%) Latency: 588129 -> 588559 (+0.07%); split: -0.07%, +0.15% InvThroughput: 83349 -> 83689 (+0.41%); split: -0.01%, +0.42% Copies: 4324 -> 4691 (+8.49%) VALU: 61431 -> 61798 (+0.60%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-01-29 17:54:34 +00:00
parent f99443a68b
commit f764f6848a
2 changed files with 15 additions and 11 deletions
@@ -2945,9 +2945,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
                  if (get_reg_specified(ctx, register_file, rc, instr, reg))
                     definition->setFixed(reg);
               }
-            } else if (instr->opcode == aco_opcode::p_parallelcopy ||
-                       (instr->opcode == aco_opcode::p_start_linear_vgpr &&
-                        !instr->operands.empty())) {
+            } else if (instr->opcode == aco_opcode::p_parallelcopy) {
               PhysReg reg = instr->operands[i].physReg();
               if (instr->operands[i].isTemp() &&
                   instr->operands[i].getTemp().type() == definition->getTemp().type() &&
@@ -61,9 +61,10 @@ BEGIN_TEST(d3d11_derivs.simple)
   //>> p_end_linear_vgpr (kill)%wqm
   pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");

-   //>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x                                             ; $_
+   //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x                                         ; $_
   //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y                                         ; $_
-   //>> v_mov_b32_e32 v#ry, v#ry_tmp                                                       ; $_
+   //>> v_mov_b32_e32 v#ry_tmp2, v#ry_tmp                                                  ; $_
+   //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp2]                                   ; $_ $_
   //>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
   pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
 END_TEST
@@ -444,9 +445,11 @@ BEGIN_TEST(d3d11_derivs.cube)
   //>> p_end_linear_vgpr (kill)%wqm
   pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");

-   //>> v_cubeid_f32 v#rf, v#_, v#_, v#_                                                     ; $_ $_
-   //>> v_fmaak_f32 v#rx, v#_, v#_, 0x3fc00000                                               ; $_ $_
-   //>> v_fmaak_f32 v#ry, v#_, v#_, 0x3fc00000                                               ; $_ $_
+   //>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_                                                 ; $_ $_
+   //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000                                           ; $_ $_
+   //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000                                           ; $_ $_
+   //>> v_mov_b32_e32 v#rf, v#rf_tmp                                                         ; $_
+   //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp]                                      ; $_ $_
   //; success = rx+1 == ry and rx+2 == rf
   //>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
   pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
@@ -492,9 +495,12 @@ BEGIN_TEST(d3d11_derivs.cube_array)

   //>> v_rndne_f32_e32 v#rl, v#_                                                             ; $_
   //>> v_cubeid_f32 v#rf, v#_, v#_, v#_                                                      ; $_ $_
-   //>> v_fmaak_f32 v#rx, v#_, v#_, 0x3fc00000                                                ; $_ $_
-   //>> v_fmaak_f32 v#ry, v#_, v#_, 0x3fc00000                                                ; $_ $_
-   //>> v_fmamk_f32 v#rlf, v#rl, 0x41000000, v#rf                                             ; $_ $_
+   //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000                                            ; $_ $_
+   //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000                                            ; $_ $_
+   //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf                                         ; $_ $_
+   //>> v_mov_b32_e32 v#rx, v#rx_tmp                                                          ; $_
+   //>> v_mov_b32_e32 v#ry, v#ry_tmp                                                          ; $_
+   //>> v_mov_b32_e32 v#rlf, v#rlf_tmp                                                        ; $_
   //>> BB1:
   //; success = rx+1 == ry and rx+2 == rlf
   //>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_