Missing patch from Ben Skeggs:

Lots of changes, and fixes for some badness on my behalf.

1. Disposable data used during fp compile is now per-context,
   rather than per-program to save memory.

2. Track usage of INPUT/TEMP registers from Mesa program, free
   them when no longer required so the hw temps can be re-used.

3. Changed LAST_NODE to OUTPUT_COLOR (see r300_reg.h)

4. Implemented remaining ARB_f_p instructions, with the exception
   of the trig/LIT opcodes.

5. Treat ZERO/ONE swizzles the same way as other native swizzles.

6. emit_arith changes, basically a complete re-write.  Should
   produce cleaner instructions, but no real functional changes.
   internal reg -> hw reg routines shared with emit_tex.  A bit
   messy still.
This commit is contained in:
Aapo Tahkola
2006-01-09 22:53:38 +00:00
parent c217d5a3a8
commit 2d4ff6a8cf
6 changed files with 867 additions and 455 deletions
+37 -10
View File
@@ -85,7 +85,6 @@ typedef struct r300_context *r300ContextPtr;
typedef GLuint uint32_t;
typedef GLubyte uint8_t;
struct r300_fragment_program;
/* We should probably change types within vertex_shader
and pixel_shader structure later on */
@@ -613,12 +612,46 @@ struct r300_vertex_program {
#define PFS_MAX_TEX_INDIRECT 4
#define PFS_NUM_TEMP_REGS 32
#define PFS_NUM_CONST_REGS 32
/* Tracking data for Mesa registers */
struct reg_acc {
int reg; /* Assigned hw temp */
unsigned int refcount; /* Number of uses by mesa program */
};
struct r300_pfs_compile_state {
int v_pos, s_pos; /* highest ALU slots used */
/* Track some information gathered during opcode
* construction.
*
* NOTE: Data is only set by the code, and isn't used yet.
*/
struct {
int vsrc[3];
int ssrc[3];
int umask;
} slot[PFS_MAX_ALU_INST];
/* Used to map Mesa's inputs/temps onto hardware temps */
int temp_in_use;
struct reg_acc temps[PFS_NUM_TEMP_REGS];
struct reg_acc inputs[32]; /* don't actually need 32... */
/* Track usage of hardware temps, for register allocation,
* indirection detection, etc. */
int hwreg_in_use;
GLuint used_in_node;
GLuint dest_in_node;
};
struct r300_fragment_program {
struct fragment_program mesa_program;
GLcontext *ctx;
GLboolean translated;
GLboolean error;
struct r300_pfs_compile_state *cs;
struct {
int length;
@@ -633,14 +666,13 @@ struct r300_fragment_program {
GLuint inst3;
} inst[PFS_MAX_ALU_INST];
} alu;
int v_pos;
int s_pos;
struct {
int tex_offset;
int tex_end;
int alu_offset;
int alu_end;
int flags;
} node[4];
int cur_node;
int first_node_has_tex;
@@ -661,14 +693,7 @@ struct r300_fragment_program {
} param[PFS_NUM_CONST_REGS];
int param_nr;
GLboolean params_uptodate;
GLuint temps[PFS_NUM_TEMP_REGS];
int temp_in_use;
GLuint used_in_node;
GLuint dest_in_node;
GLuint inputs[32]; /* don't actually need 32... */
int hwreg_in_use;
int max_temp_idx;
};
@@ -794,6 +819,8 @@ struct r300_state {
struct r300_vertex_shader_state vertex_shader;
#if USE_ARB_F_P == 0
struct r300_pixel_shader_state pixel_shader;
#else
struct r300_pfs_compile_state pfs_compile;
#endif
struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
int aos_count;
File diff suppressed because it is too large Load Diff
+30 -3
View File
@@ -21,6 +21,8 @@ typedef struct _pfs_reg_t {
GLuint v_swz:5;
GLuint s_swz:5;
GLuint negate:1; //XXX: we need to handle negate individually
GLuint absolute:1;
GLboolean no_use:1;
GLboolean valid:1;
} pfs_reg_t;
@@ -38,12 +40,37 @@ typedef struct _pfs_reg_t {
#define PFS_OP_RSQ 10
#define PFS_OP_REPL_ALPHA 11
#define MAX_PFS_OP 11
#define OP(n) PFS_OP_##n
#define PFS_FLAG_SAT (1 << 0)
#define PFS_FLAG_ABS (1 << 1)
extern void translate_fragment_shader(struct r300_fragment_program *rp);
#define ARG_NEG (1 << 5)
#define ARG_ABS (1 << 6)
#define ARG_MASK (127 << 0)
#define ARG_STRIDE 7
#define SRC_CONST (1 << 5)
#define SRC_MASK (63 << 0)
#define SRC_STRIDE 6
#endif /* __R300_FRAGPROG_H_ */
#define NOP_INST0 ( \
(R300_FPI0_OUTC_MAD) | \
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT))
#define NOP_INST1 ( \
((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \
((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \
((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT))
#define NOP_INST2 ( \
(R300_FPI2_OUTA_MAD) | \
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT))
#define NOP_INST3 ( \
((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \
((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \
((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT))
#endif
+2 -1
View File
@@ -455,7 +455,8 @@ static void r300EmitClearState(GLcontext * ctx)
e32(0);
e32(0);
e32(0);
e32(R300_PFS_NODE_LAST_NODE);
// e32(R300_PFS_NODE_LAST_NODE);
e32(R300_PFS_NODE_OUTPUT_COLOR);
R300_STATECHANGE(r300, fpi[0]);
R300_STATECHANGE(r300, fpi[1]);
+7 -1
View File
@@ -863,7 +863,9 @@ I am fairly certain that they are correct unless stated otherwise in comments.
# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12)
# define R300_PFS_NODE_TEX_END_SHIFT 17
# define R300_PFS_NODE_TEX_END_MASK (31 << 17)
# define R300_PFS_NODE_LAST_NODE (1 << 22)
/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */
# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22)
# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23)
/* TEX
// As far as I can tell, texture instructions cannot write into output
@@ -882,6 +884,7 @@ I am fairly certain that they are correct unless stated otherwise in comments.
*/
# define R300_FPITX_OPCODE_SHIFT 15
# define R300_FPITX_OP_TEX 1
# define R300_FPITX_OP_KIL 2
# define R300_FPITX_OP_TXP 3
# define R300_FPITX_OP_TXB 4
@@ -957,9 +960,11 @@ I am fairly certain that they are correct unless stated otherwise in comments.
# define R300_FPI1_SRC2C_CONST (1 << 17)
# define R300_FPI1_DSTC_SHIFT 18
# define R300_FPI1_DSTC_MASK (31 << 18)
# define R300_FPI1_DSTC_REG_MASK_SHIFT 23
# define R300_FPI1_DSTC_REG_X (1 << 23)
# define R300_FPI1_DSTC_REG_Y (1 << 24)
# define R300_FPI1_DSTC_REG_Z (1 << 25)
# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26
# define R300_FPI1_DSTC_OUTPUT_X (1 << 26)
# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27)
# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28)
@@ -978,6 +983,7 @@ I am fairly certain that they are correct unless stated otherwise in comments.
# define R300_FPI3_DSTA_MASK (31 << 18)
# define R300_FPI3_DSTA_REG (1 << 23)
# define R300_FPI3_DSTA_OUTPUT (1 << 24)
# define R300_FPI3_DSTA_DEPTH (1 << 27)
#define R300_PFS_INSTR0_0 0x48C0
# define R300_FPI0_ARGC_SRC0C_XYZ 0
+9 -10
View File
@@ -1233,8 +1233,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
vp_reg++;
} else {
/* Passing invalid data here can lock the GPU. */
fprintf(stderr, "fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
exit(-1);
WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
//exit(-1);
}
InputsRead &= ~(FRAG_BIT_TEX0<<i);
fp_reg++;
@@ -1243,8 +1243,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
if (InputsRead & FRAG_BIT_COL0) {
if (!(OutputsWritten & (hw_tcl_on ? (1<<VERT_RESULT_COL0) : _TNL_BIT_COLOR0))) {
fprintf(stderr, "fragprog wants col0, vp doesn't provide it\n");
exit(-1);
WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
//exit(-1);
}
r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
@@ -1256,8 +1256,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
if (InputsRead & FRAG_BIT_COL1) {
if (!(OutputsWritten & (hw_tcl_on ? (1<<VERT_RESULT_COL1) : _TNL_BIT_COLOR1))) {
fprintf(stderr, "fragprog wants col1, vp doesn't provide it\n");
exit(-1);
WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
//exit(-1);
}
r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11
@@ -1649,7 +1649,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
translate_vertex_shader(vp);
if (vp->translated == GL_FALSE) {
fprintf(stderr, "Failing back to sw-tcl\n");
debug_vp(ctx, &vp->mesa_program);
// debug_vp(ctx, &vp->mesa_program);
hw_tcl_on = future_hw_tcl_on = 0;
r300ResetHwState(rmesa);
@@ -1739,8 +1739,7 @@ static unsigned int r300PackFloat24(float f)
void r300SetupPixelShader(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
struct r300_fragment_program *rp =
(struct r300_fragment_program *)ctx->FragmentProgram._Current;
struct r300_fragment_program *rp = ctx->FragmentProgram._Current;
int i,k;
if (!rp) /* should only happenen once, just after context is created */
@@ -1778,7 +1777,7 @@ void r300SetupPixelShader(r300ContextPtr rmesa)
| (rp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT)
| (rp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
| (rp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT)
| ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0);
| rp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */
} else {
rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0;
}