st/nine: Add support for swvp constants in shaders
swvp has relaxed limits (more nested loops, etc). In particular it enables more constants. Signed-off-by: Axel Davy <axel.davy@ens.fr>
This commit is contained in:
@@ -432,11 +432,7 @@ struct sm1_local_const
|
||||
{
|
||||
INT idx;
|
||||
struct ureg_src reg;
|
||||
union {
|
||||
boolean b;
|
||||
float f[4];
|
||||
int32_t i[4];
|
||||
} imm;
|
||||
float f[4]; /* for indirect addressing of float constants */
|
||||
};
|
||||
|
||||
struct shader_translator
|
||||
@@ -507,8 +503,10 @@ struct shader_translator
|
||||
|
||||
struct sm1_local_const *lconstf;
|
||||
unsigned num_lconstf;
|
||||
struct sm1_local_const lconsti[NINE_MAX_CONST_I];
|
||||
struct sm1_local_const lconstb[NINE_MAX_CONST_B];
|
||||
struct sm1_local_const *lconsti;
|
||||
unsigned num_lconsti;
|
||||
struct sm1_local_const *lconstb;
|
||||
unsigned num_lconstb;
|
||||
|
||||
boolean indirect_const_access;
|
||||
boolean failure;
|
||||
@@ -542,6 +540,7 @@ static boolean
|
||||
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
|
||||
{
|
||||
INT i;
|
||||
|
||||
if (index < 0 || index >= tx->num_constf_allowed) {
|
||||
tx->failure = TRUE;
|
||||
return FALSE;
|
||||
@@ -557,24 +556,36 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
|
||||
static boolean
|
||||
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (index < 0 || index >= tx->num_consti_allowed) {
|
||||
tx->failure = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
if (tx->lconsti[index].idx == index)
|
||||
*src = tx->lconsti[index].reg;
|
||||
return tx->lconsti[index].idx == index;
|
||||
for (i = 0; i < tx->num_lconsti; ++i) {
|
||||
if (tx->lconsti[i].idx == index) {
|
||||
*src = tx->lconsti[i].reg;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
static boolean
|
||||
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (index < 0 || index >= tx->num_constb_allowed) {
|
||||
tx->failure = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
if (tx->lconstb[index].idx == index)
|
||||
*src = tx->lconstb[index].reg;
|
||||
return tx->lconstb[index].idx == index;
|
||||
for (i = 0; i < tx->num_lconstb; ++i) {
|
||||
if (tx->lconstb[i].idx == index) {
|
||||
*src = tx->lconstb[i].reg;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -599,23 +610,55 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
|
||||
tx->lconstf[n].idx = index;
|
||||
tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
|
||||
|
||||
memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f));
|
||||
memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
|
||||
}
|
||||
static void
|
||||
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
|
||||
{
|
||||
unsigned n;
|
||||
|
||||
FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
|
||||
tx->lconsti[index].idx = index;
|
||||
tx->lconsti[index].reg = tx->native_integers ?
|
||||
|
||||
for (n = 0; n < tx->num_lconsti; ++n)
|
||||
if (tx->lconsti[n].idx == index)
|
||||
break;
|
||||
if (n == tx->num_lconsti) {
|
||||
if ((n % 8) == 0) {
|
||||
tx->lconsti = REALLOC(tx->lconsti,
|
||||
(n + 0) * sizeof(tx->lconsti[0]),
|
||||
(n + 8) * sizeof(tx->lconsti[0]));
|
||||
assert(tx->lconsti);
|
||||
}
|
||||
tx->num_lconsti++;
|
||||
}
|
||||
|
||||
tx->lconsti[n].idx = index;
|
||||
tx->lconsti[n].reg = tx->native_integers ?
|
||||
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
|
||||
ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
|
||||
}
|
||||
static void
|
||||
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
|
||||
{
|
||||
unsigned n;
|
||||
|
||||
FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
|
||||
tx->lconstb[index].idx = index;
|
||||
tx->lconstb[index].reg = tx->native_integers ?
|
||||
|
||||
for (n = 0; n < tx->num_lconstb; ++n)
|
||||
if (tx->lconstb[n].idx == index)
|
||||
break;
|
||||
if (n == tx->num_lconstb) {
|
||||
if ((n % 8) == 0) {
|
||||
tx->lconstb = REALLOC(tx->lconstb,
|
||||
(n + 0) * sizeof(tx->lconstb[0]),
|
||||
(n + 8) * sizeof(tx->lconstb[0]));
|
||||
assert(tx->lconstb);
|
||||
}
|
||||
tx->num_lconstb++;
|
||||
}
|
||||
|
||||
tx->lconstb[n].idx = index;
|
||||
tx->lconstb[n].reg = tx->native_integers ?
|
||||
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
|
||||
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
|
||||
}
|
||||
@@ -942,7 +985,26 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
|
||||
if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
|
||||
if (!param->rel)
|
||||
nine_info_mark_const_f_used(tx->info, param->idx);
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
|
||||
/* vswp constant handling: we use two buffers
|
||||
* to fit all the float constants. The special handling
|
||||
* doesn't need to be elsewhere, because all the instructions
|
||||
* accessing the constants directly are VS1, and swvp
|
||||
* is VS >= 2 */
|
||||
if (IS_VS && tx->info->swvp_on) {
|
||||
if (!param->rel) {
|
||||
if (param->idx < 4096) {
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
|
||||
src = ureg_src_dimension(src, 0);
|
||||
} else {
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
|
||||
src = ureg_src_dimension(src, 1);
|
||||
}
|
||||
} else {
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
|
||||
src = ureg_src_dimension(src, 0);
|
||||
}
|
||||
} else
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
|
||||
}
|
||||
if (!IS_VS && tx->version.major < 2) {
|
||||
/* ps 1.X clamps constants */
|
||||
@@ -964,8 +1026,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
|
||||
assert(!param->rel);
|
||||
if (!tx_lconsti(tx, &src, param->idx)) {
|
||||
nine_info_mark_const_i_used(tx->info, param->idx);
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT,
|
||||
tx->info->const_i_base + param->idx);
|
||||
if (IS_VS && tx->info->swvp_on) {
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
|
||||
src = ureg_src_dimension(src, 2);
|
||||
} else
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT,
|
||||
tx->info->const_i_base + param->idx);
|
||||
}
|
||||
break;
|
||||
case D3DSPR_CONSTBOOL:
|
||||
@@ -974,8 +1040,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
|
||||
char r = param->idx / 4;
|
||||
char s = param->idx & 3;
|
||||
nine_info_mark_const_b_used(tx->info, param->idx);
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT,
|
||||
tx->info->const_b_base + r);
|
||||
if (IS_VS && tx->info->swvp_on) {
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT, r);
|
||||
src = ureg_src_dimension(src, 3);
|
||||
} else
|
||||
src = ureg_src_register(TGSI_FILE_CONSTANT,
|
||||
tx->info->const_b_base + r);
|
||||
src = ureg_swizzle(src, s, s, s, s);
|
||||
}
|
||||
break;
|
||||
@@ -3353,8 +3423,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
|
||||
struct shader_translator *tx;
|
||||
HRESULT hr = D3D_OK;
|
||||
const unsigned processor = info->type;
|
||||
unsigned s, slot_max;
|
||||
unsigned max_const_f;
|
||||
|
||||
user_assert(processor != ~0, D3DERR_INVALIDCALL);
|
||||
|
||||
@@ -3411,6 +3479,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
|
||||
tx->num_constb_allowed = NINE_MAX_CONST_B;
|
||||
}
|
||||
|
||||
if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
|
||||
tx->num_constf_allowed = 8192;
|
||||
tx->num_consti_allowed = 2048;
|
||||
tx->num_constb_allowed = 2048;
|
||||
}
|
||||
|
||||
/* VS must always write position. Declare it here to make it the 1st output.
|
||||
* (Some drivers like nv50 are buggy and rely on that.)
|
||||
*/
|
||||
@@ -3485,7 +3559,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
|
||||
k = i;
|
||||
}
|
||||
indices[n] = tx->lconstf[k].idx;
|
||||
memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float));
|
||||
memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
|
||||
tx->lconstf[k].idx = INT_MAX;
|
||||
}
|
||||
|
||||
@@ -3520,25 +3594,35 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
|
||||
|
||||
/* r500 */
|
||||
if (info->const_float_slots > device->max_vs_const_f &&
|
||||
(info->const_int_slots || info->const_bool_slots))
|
||||
(info->const_int_slots || info->const_bool_slots) &&
|
||||
(!IS_VS || !info->swvp_on))
|
||||
ERR("Overlapping constant slots. The shader is likely to be buggy\n");
|
||||
|
||||
|
||||
if (tx->indirect_const_access) /* vs only */
|
||||
info->const_float_slots = device->max_vs_const_f;
|
||||
|
||||
max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
|
||||
slot_max = info->const_bool_slots > 0 ?
|
||||
max_const_f + NINE_MAX_CONST_I
|
||||
+ DIV_ROUND_UP(info->const_bool_slots, 4) :
|
||||
info->const_int_slots > 0 ?
|
||||
max_const_f + info->const_int_slots :
|
||||
info->const_float_slots;
|
||||
if (!IS_VS || !info->swvp_on) {
|
||||
unsigned s, slot_max;
|
||||
unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
|
||||
|
||||
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
|
||||
slot_max = info->const_bool_slots > 0 ?
|
||||
max_const_f + NINE_MAX_CONST_I
|
||||
+ DIV_ROUND_UP(info->const_bool_slots, 4) :
|
||||
info->const_int_slots > 0 ?
|
||||
max_const_f + info->const_int_slots :
|
||||
info->const_float_slots;
|
||||
|
||||
for (s = 0; s < slot_max; s++)
|
||||
ureg_DECL_constant(tx->ureg, s);
|
||||
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
|
||||
|
||||
for (s = 0; s < slot_max; s++)
|
||||
ureg_DECL_constant(tx->ureg, s);
|
||||
} else {
|
||||
ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
|
||||
ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
|
||||
ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
|
||||
ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
|
||||
}
|
||||
|
||||
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
|
||||
unsigned count;
|
||||
|
||||
@@ -76,6 +76,8 @@ struct nine_shader_info
|
||||
|
||||
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
|
||||
uint8_t bumpenvmat_needed;
|
||||
|
||||
boolean swvp_on;
|
||||
};
|
||||
|
||||
static inline void
|
||||
|
||||
@@ -63,6 +63,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
|
||||
info.fog_enable = 0;
|
||||
info.point_size_min = 0;
|
||||
info.point_size_max = 0;
|
||||
info.swvp_on = false;
|
||||
|
||||
hr = nine_translate_shader(device, &info);
|
||||
if (FAILED(hr))
|
||||
@@ -167,6 +168,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
|
||||
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
|
||||
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
|
||||
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
|
||||
info.swvp_on = false;
|
||||
|
||||
hr = nine_translate_shader(This->base.device, &info);
|
||||
if (FAILED(hr))
|
||||
|
||||
Reference in New Issue
Block a user