Missing patch from Ben Skeggs:
Lots of changes, and fixes for some badness on my behalf. 1. Disposable data used during fp compile is now per-context, rather than per-program to save memory. 2. Track usage of INPUT/TEMP registers from Mesa program, free them when no longer required so the hw temps can be re-used. 3. Changed LAST_NODE to OUTPUT_COLOR (see r300_reg.h) 4. Implemented remaining ARB_f_p instructions, with the exception of the trig/LIT opcodes. 5. Treat ZERO/ONE swizzles the same way as other native swizzles. 6. emit_arith changes, basically a complete re-write. Should produce cleaner instructions, but no real functional changes. internal reg -> hw reg routines shared with emit_tex. A bit messy still.
This commit is contained in:
@@ -85,7 +85,6 @@ typedef struct r300_context *r300ContextPtr;
|
||||
|
||||
typedef GLuint uint32_t;
|
||||
typedef GLubyte uint8_t;
|
||||
struct r300_fragment_program;
|
||||
|
||||
/* We should probably change types within vertex_shader
|
||||
and pixel_shader structure later on */
|
||||
@@ -613,12 +612,46 @@ struct r300_vertex_program {
|
||||
#define PFS_MAX_TEX_INDIRECT 4
|
||||
#define PFS_NUM_TEMP_REGS 32
|
||||
#define PFS_NUM_CONST_REGS 32
|
||||
|
||||
/* Tracking data for Mesa registers */
|
||||
struct reg_acc {
|
||||
int reg; /* Assigned hw temp */
|
||||
unsigned int refcount; /* Number of uses by mesa program */
|
||||
};
|
||||
|
||||
struct r300_pfs_compile_state {
|
||||
int v_pos, s_pos; /* highest ALU slots used */
|
||||
|
||||
/* Track some information gathered during opcode
|
||||
* construction.
|
||||
*
|
||||
* NOTE: Data is only set by the code, and isn't used yet.
|
||||
*/
|
||||
struct {
|
||||
int vsrc[3];
|
||||
int ssrc[3];
|
||||
int umask;
|
||||
} slot[PFS_MAX_ALU_INST];
|
||||
|
||||
/* Used to map Mesa's inputs/temps onto hardware temps */
|
||||
int temp_in_use;
|
||||
struct reg_acc temps[PFS_NUM_TEMP_REGS];
|
||||
struct reg_acc inputs[32]; /* don't actually need 32... */
|
||||
|
||||
/* Track usage of hardware temps, for register allocation,
|
||||
* indirection detection, etc. */
|
||||
int hwreg_in_use;
|
||||
GLuint used_in_node;
|
||||
GLuint dest_in_node;
|
||||
};
|
||||
|
||||
struct r300_fragment_program {
|
||||
struct fragment_program mesa_program;
|
||||
|
||||
GLcontext *ctx;
|
||||
GLboolean translated;
|
||||
GLboolean error;
|
||||
struct r300_pfs_compile_state *cs;
|
||||
|
||||
struct {
|
||||
int length;
|
||||
@@ -633,14 +666,13 @@ struct r300_fragment_program {
|
||||
GLuint inst3;
|
||||
} inst[PFS_MAX_ALU_INST];
|
||||
} alu;
|
||||
int v_pos;
|
||||
int s_pos;
|
||||
|
||||
struct {
|
||||
int tex_offset;
|
||||
int tex_end;
|
||||
int alu_offset;
|
||||
int alu_end;
|
||||
int flags;
|
||||
} node[4];
|
||||
int cur_node;
|
||||
int first_node_has_tex;
|
||||
@@ -661,14 +693,7 @@ struct r300_fragment_program {
|
||||
} param[PFS_NUM_CONST_REGS];
|
||||
int param_nr;
|
||||
GLboolean params_uptodate;
|
||||
|
||||
GLuint temps[PFS_NUM_TEMP_REGS];
|
||||
int temp_in_use;
|
||||
GLuint used_in_node;
|
||||
GLuint dest_in_node;
|
||||
GLuint inputs[32]; /* don't actually need 32... */
|
||||
|
||||
int hwreg_in_use;
|
||||
int max_temp_idx;
|
||||
};
|
||||
|
||||
@@ -794,6 +819,8 @@ struct r300_state {
|
||||
struct r300_vertex_shader_state vertex_shader;
|
||||
#if USE_ARB_F_P == 0
|
||||
struct r300_pixel_shader_state pixel_shader;
|
||||
#else
|
||||
struct r300_pfs_compile_state pfs_compile;
|
||||
#endif
|
||||
struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
|
||||
int aos_count;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,8 @@ typedef struct _pfs_reg_t {
|
||||
GLuint v_swz:5;
|
||||
GLuint s_swz:5;
|
||||
GLuint negate:1; //XXX: we need to handle negate individually
|
||||
GLuint absolute:1;
|
||||
GLboolean no_use:1;
|
||||
GLboolean valid:1;
|
||||
} pfs_reg_t;
|
||||
|
||||
@@ -38,12 +40,37 @@ typedef struct _pfs_reg_t {
|
||||
#define PFS_OP_RSQ 10
|
||||
#define PFS_OP_REPL_ALPHA 11
|
||||
#define MAX_PFS_OP 11
|
||||
#define OP(n) PFS_OP_##n
|
||||
|
||||
#define PFS_FLAG_SAT (1 << 0)
|
||||
#define PFS_FLAG_ABS (1 << 1)
|
||||
|
||||
extern void translate_fragment_shader(struct r300_fragment_program *rp);
|
||||
#define ARG_NEG (1 << 5)
|
||||
#define ARG_ABS (1 << 6)
|
||||
#define ARG_MASK (127 << 0)
|
||||
#define ARG_STRIDE 7
|
||||
#define SRC_CONST (1 << 5)
|
||||
#define SRC_MASK (63 << 0)
|
||||
#define SRC_STRIDE 6
|
||||
|
||||
#endif /* __R300_FRAGPROG_H_ */
|
||||
#define NOP_INST0 ( \
|
||||
(R300_FPI0_OUTC_MAD) | \
|
||||
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \
|
||||
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \
|
||||
(R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT))
|
||||
#define NOP_INST1 ( \
|
||||
((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \
|
||||
((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \
|
||||
((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT))
|
||||
#define NOP_INST2 ( \
|
||||
(R300_FPI2_OUTA_MAD) | \
|
||||
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \
|
||||
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \
|
||||
(R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT))
|
||||
#define NOP_INST3 ( \
|
||||
((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \
|
||||
((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \
|
||||
((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -455,7 +455,8 @@ static void r300EmitClearState(GLcontext * ctx)
|
||||
e32(0);
|
||||
e32(0);
|
||||
e32(0);
|
||||
e32(R300_PFS_NODE_LAST_NODE);
|
||||
// e32(R300_PFS_NODE_LAST_NODE);
|
||||
e32(R300_PFS_NODE_OUTPUT_COLOR);
|
||||
|
||||
R300_STATECHANGE(r300, fpi[0]);
|
||||
R300_STATECHANGE(r300, fpi[1]);
|
||||
|
||||
@@ -863,7 +863,9 @@ I am fairly certain that they are correct unless stated otherwise in comments.
|
||||
# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12)
|
||||
# define R300_PFS_NODE_TEX_END_SHIFT 17
|
||||
# define R300_PFS_NODE_TEX_END_MASK (31 << 17)
|
||||
# define R300_PFS_NODE_LAST_NODE (1 << 22)
|
||||
/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */
|
||||
# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22)
|
||||
# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23)
|
||||
|
||||
/* TEX
|
||||
// As far as I can tell, texture instructions cannot write into output
|
||||
@@ -882,6 +884,7 @@ I am fairly certain that they are correct unless stated otherwise in comments.
|
||||
*/
|
||||
# define R300_FPITX_OPCODE_SHIFT 15
|
||||
# define R300_FPITX_OP_TEX 1
|
||||
# define R300_FPITX_OP_KIL 2
|
||||
# define R300_FPITX_OP_TXP 3
|
||||
# define R300_FPITX_OP_TXB 4
|
||||
|
||||
@@ -957,9 +960,11 @@ I am fairly certain that they are correct unless stated otherwise in comments.
|
||||
# define R300_FPI1_SRC2C_CONST (1 << 17)
|
||||
# define R300_FPI1_DSTC_SHIFT 18
|
||||
# define R300_FPI1_DSTC_MASK (31 << 18)
|
||||
# define R300_FPI1_DSTC_REG_MASK_SHIFT 23
|
||||
# define R300_FPI1_DSTC_REG_X (1 << 23)
|
||||
# define R300_FPI1_DSTC_REG_Y (1 << 24)
|
||||
# define R300_FPI1_DSTC_REG_Z (1 << 25)
|
||||
# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26
|
||||
# define R300_FPI1_DSTC_OUTPUT_X (1 << 26)
|
||||
# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27)
|
||||
# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28)
|
||||
@@ -978,6 +983,7 @@ I am fairly certain that they are correct unless stated otherwise in comments.
|
||||
# define R300_FPI3_DSTA_MASK (31 << 18)
|
||||
# define R300_FPI3_DSTA_REG (1 << 23)
|
||||
# define R300_FPI3_DSTA_OUTPUT (1 << 24)
|
||||
# define R300_FPI3_DSTA_DEPTH (1 << 27)
|
||||
|
||||
#define R300_PFS_INSTR0_0 0x48C0
|
||||
# define R300_FPI0_ARGC_SRC0C_XYZ 0
|
||||
|
||||
@@ -1233,8 +1233,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
|
||||
vp_reg++;
|
||||
} else {
|
||||
/* Passing invalid data here can lock the GPU. */
|
||||
fprintf(stderr, "fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
|
||||
exit(-1);
|
||||
WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
|
||||
//exit(-1);
|
||||
}
|
||||
InputsRead &= ~(FRAG_BIT_TEX0<<i);
|
||||
fp_reg++;
|
||||
@@ -1243,8 +1243,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
|
||||
|
||||
if (InputsRead & FRAG_BIT_COL0) {
|
||||
if (!(OutputsWritten & (hw_tcl_on ? (1<<VERT_RESULT_COL0) : _TNL_BIT_COLOR0))) {
|
||||
fprintf(stderr, "fragprog wants col0, vp doesn't provide it\n");
|
||||
exit(-1);
|
||||
WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
|
||||
//exit(-1);
|
||||
}
|
||||
|
||||
r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
|
||||
@@ -1256,8 +1256,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
|
||||
|
||||
if (InputsRead & FRAG_BIT_COL1) {
|
||||
if (!(OutputsWritten & (hw_tcl_on ? (1<<VERT_RESULT_COL1) : _TNL_BIT_COLOR1))) {
|
||||
fprintf(stderr, "fragprog wants col1, vp doesn't provide it\n");
|
||||
exit(-1);
|
||||
WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
|
||||
//exit(-1);
|
||||
}
|
||||
|
||||
r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11
|
||||
@@ -1649,7 +1649,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
|
||||
translate_vertex_shader(vp);
|
||||
if (vp->translated == GL_FALSE) {
|
||||
fprintf(stderr, "Failing back to sw-tcl\n");
|
||||
debug_vp(ctx, &vp->mesa_program);
|
||||
// debug_vp(ctx, &vp->mesa_program);
|
||||
hw_tcl_on = future_hw_tcl_on = 0;
|
||||
r300ResetHwState(rmesa);
|
||||
|
||||
@@ -1739,8 +1739,7 @@ static unsigned int r300PackFloat24(float f)
|
||||
void r300SetupPixelShader(r300ContextPtr rmesa)
|
||||
{
|
||||
GLcontext *ctx = rmesa->radeon.glCtx;
|
||||
struct r300_fragment_program *rp =
|
||||
(struct r300_fragment_program *)ctx->FragmentProgram._Current;
|
||||
struct r300_fragment_program *rp = ctx->FragmentProgram._Current;
|
||||
int i,k;
|
||||
|
||||
if (!rp) /* should only happenen once, just after context is created */
|
||||
@@ -1778,7 +1777,7 @@ void r300SetupPixelShader(r300ContextPtr rmesa)
|
||||
| (rp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT)
|
||||
| (rp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
|
||||
| (rp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT)
|
||||
| ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0);
|
||||
| rp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */
|
||||
} else {
|
||||
rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user