Merge remote-tracking branch 'public/master' into vulkan

This commit is contained in:
Jason Ekstrand
2016-04-01 14:59:38 -07:00
253 changed files with 8673 additions and 3663 deletions
+2 -2
View File
@@ -179,10 +179,10 @@ NIR_FILES = \
nir/nir_gather_info.c \
nir/nir_gs_count_vertices.c \
nir/nir_inline_functions.c \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
nir/nir_instr_set.c \
nir/nir_instr_set.h \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
nir/nir_liveness.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
+33 -2
View File
@@ -214,6 +214,7 @@ public:
subexpressions[2] = NULL;
primary_expression.identifier = identifier;
this->non_lvalue_description = NULL;
this->is_lhs = false;
}
static const char *operator_string(enum ast_operators op);
@@ -263,6 +264,11 @@ public:
* This pointer may be \c NULL.
*/
const char *non_lvalue_description;
void set_is_lhs(bool new_value);
private:
bool is_lhs;
};
class ast_expression_bin : public ast_expression {
@@ -556,6 +562,15 @@ struct ast_type_qualifier {
unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */
/** \} */
/** \name Layout qualifiers for GL_ARB_enhanced_layouts */
/** \{ */
unsigned explicit_xfb_offset:1; /**< xfb_offset value assigned explicitly by shader code */
unsigned xfb_buffer:1; /**< Has xfb_buffer value assigned */
unsigned explicit_xfb_buffer:1; /**< xfb_buffer value assigned explicitly by shader code */
unsigned xfb_stride:1; /**< Is xfb_stride value yet to be merged with global values */
unsigned explicit_xfb_stride:1; /**< xfb_stride value assigned explicitly by shader code */
/** \} */
/** \name Layout qualifiers for GL_ARB_tessellation_shader */
/** \{ */
/* tess eval input layout */
@@ -612,6 +627,15 @@ struct ast_type_qualifier {
/** Stream in GLSL 1.50 geometry shaders. */
ast_expression *stream;
/** xfb_buffer specified via the GL_ARB_enhanced_layouts keyword. */
ast_expression *xfb_buffer;
/** xfb_stride specified via the GL_ARB_enhanced_layouts keyword. */
ast_expression *xfb_stride;
/** global xfb_stride values for each buffer */
ast_layout_expression *out_xfb_stride[MAX_FEEDBACK_BUFFERS];
/**
* Input or output primitive type in GLSL 1.50 geometry shaders
* and tessellation shaders.
@@ -627,8 +651,9 @@ struct ast_type_qualifier {
ast_expression *binding;
/**
* Offset specified via GL_ARB_shader_atomic_counter's "offset"
* keyword.
* Offset specified via GL_ARB_shader_atomic_counter's or
* GL_ARB_enhanced_layouts "offset" keyword, or by GL_ARB_enhanced_layouts
* "xfb_offset" keyword.
*
* \note
* This field is only valid if \c explicit_offset is set.
@@ -1199,4 +1224,10 @@ extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
ast_interface_block *const block,
const struct ast_type_qualifier &q);
extern bool
process_qualifier_constant(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const char *qual_indentifier,
ast_expression *const_expression,
unsigned *value);
#endif /* AST_H */
+4
View File
@@ -1727,6 +1727,10 @@ ast_function_expression::handle_method(exec_list *instructions,
const char *method;
method = field->primary_expression.identifier;
/* This would prevent to raise "uninitialized variable" warnings when
* calling array.length.
*/
field->subexpressions[0]->set_is_lhs(true);
op = field->subexpressions[0]->hir(instructions, state);
if (strcmp(method, "length") == 0) {
if (!this->expressions.is_empty()) {
+262 -44
View File
@@ -54,6 +54,7 @@
#include "ast.h"
#include "compiler/glsl_types.h"
#include "program/hash_table.h"
#include "main/macros.h"
#include "main/shaderobj.h"
#include "ir.h"
#include "ir_builder.h"
@@ -819,7 +820,7 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
* if the expression indicating the vertex number is not the identifier
* `gl_InvocationID`.
*/
if (state->stage == MESA_SHADER_TESS_CTRL) {
if (state->stage == MESA_SHADER_TESS_CTRL && !lhs->type->is_error()) {
ir_variable *var = lhs->variable_referenced();
if (var->data.mode == ir_var_shader_out && !var->data.patch) {
ir_rvalue *index = find_innermost_array_index(lhs);
@@ -1248,6 +1249,24 @@ ast_expression::hir_no_rvalue(exec_list *instructions,
do_hir(instructions, state, false);
}
void
ast_expression::set_is_lhs(bool new_value)
{
/* is_lhs is tracked only to print "variable used uninitialized" warnings,
* if we lack a identifier we can just skip it.
*/
if (this->primary_expression.identifier == NULL)
return;
this->is_lhs = new_value;
/* We need to go through the subexpressions tree to cover cases like
* ast_field_selection
*/
if (this->subexpressions[0] != NULL)
this->subexpressions[0]->set_is_lhs(new_value);
}
ir_rvalue *
ast_expression::do_hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state,
@@ -1323,6 +1342,7 @@ ast_expression::do_hir(exec_list *instructions,
break;
case ast_assign: {
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
@@ -1592,6 +1612,7 @@ ast_expression::do_hir(exec_list *instructions,
case ast_div_assign:
case ast_add_assign:
case ast_sub_assign: {
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
@@ -1618,6 +1639,7 @@ ast_expression::do_hir(exec_list *instructions,
}
case ast_mod_assign: {
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
@@ -1640,6 +1662,7 @@ ast_expression::do_hir(exec_list *instructions,
case ast_ls_assign:
case ast_rs_assign: {
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
type = shift_result_type(op[0]->type, op[1]->type, this->oper, state,
@@ -1658,6 +1681,7 @@ ast_expression::do_hir(exec_list *instructions,
case ast_and_assign:
case ast_xor_assign:
case ast_or_assign: {
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
@@ -1839,6 +1863,11 @@ ast_expression::do_hir(exec_list *instructions,
case ast_array_index: {
YYLTYPE index_loc = subexpressions[1]->get_location();
/* Getting if an array is being used uninitialized is beyond what we get
* from ir_value.data.assigned. Setting is_lhs as true would force to
* not raise a uninitialized warning when using an array
*/
subexpressions[0]->set_is_lhs(true);
op[0] = subexpressions[0]->hir(instructions, state);
op[1] = subexpressions[1]->hir(instructions, state);
@@ -1873,6 +1902,14 @@ ast_expression::do_hir(exec_list *instructions,
if (var != NULL) {
var->data.used = true;
result = new(ctx) ir_dereference_variable(var);
if ((var->data.mode == ir_var_auto || var->data.mode == ir_var_shader_out)
&& !this->is_lhs
&& result->variable_referenced()->data.assigned != true
&& !is_gl_identifier(var->name)) {
_mesa_glsl_warning(&loc, state, "`%s' used uninitialized",
this->primary_expression.identifier);
}
} else {
_mesa_glsl_error(& loc, state, "`%s' undeclared",
this->primary_expression.identifier);
@@ -2318,11 +2355,11 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
return names[type_idx];
}
case GLSL_SAMPLER_DIM_BUF: {
assert(type->base_type == GLSL_TYPE_SAMPLER);
static const char *const names[4] = {
"samplerBuffer", NULL, NULL, NULL
static const char *const names[8] = {
"samplerBuffer", NULL, NULL, NULL,
"imageBuffer", NULL, NULL, NULL
};
return names[type_idx];
return names[offset + type_idx];
}
case GLSL_SAMPLER_DIM_EXTERNAL: {
assert(type->base_type == GLSL_TYPE_SAMPLER);
@@ -2380,11 +2417,11 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
return names[type_idx];
}
case GLSL_SAMPLER_DIM_BUF: {
assert(type->base_type == GLSL_TYPE_SAMPLER);
static const char *const names[4] = {
"isamplerBuffer", NULL, NULL, NULL
static const char *const names[8] = {
"isamplerBuffer", NULL, NULL, NULL,
"iimageBuffer", NULL, NULL, NULL
};
return names[type_idx];
return names[offset + type_idx];
}
default:
unreachable("Unsupported isampler/iimage dimensionality");
@@ -2435,11 +2472,11 @@ get_type_name_for_precision_qualifier(const glsl_type *type)
return names[type_idx];
}
case GLSL_SAMPLER_DIM_BUF: {
assert(type->base_type == GLSL_TYPE_SAMPLER);
static const char *const names[4] = {
"usamplerBuffer", NULL, NULL, NULL
static const char *const names[8] = {
"usamplerBuffer", NULL, NULL, NULL,
"uimageBuffer", NULL, NULL, NULL
};
return names[type_idx];
return names[offset + type_idx];
}
default:
unreachable("Unsupported usampler/uimage dimensionality");
@@ -2550,43 +2587,79 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
}
static bool
process_qualifier_constant(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const char *qual_indentifier,
ast_expression *const_expression,
unsigned *value)
{
exec_list dummy_instructions;
if (const_expression == NULL) {
*value = 0;
return true;
}
ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
ir_constant *const const_int = ir->constant_expression_value();
if (const_int == NULL || !const_int->type->is_integer()) {
_mesa_glsl_error(loc, state, "%s must be an integral constant "
"expression", qual_indentifier);
validate_xfb_buffer_qualifier(YYLTYPE *loc,
struct _mesa_glsl_parse_state *state,
unsigned xfb_buffer) {
if (xfb_buffer >= state->Const.MaxTransformFeedbackBuffers) {
_mesa_glsl_error(loc, state,
"invalid xfb_buffer specified %d is larger than "
"MAX_TRANSFORM_FEEDBACK_BUFFERS - 1 (%d).",
xfb_buffer,
state->Const.MaxTransformFeedbackBuffers - 1);
return false;
}
if (const_int->value.i[0] < 0) {
_mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
qual_indentifier, const_int->value.u[0]);
return true;
}
/* From the ARB_enhanced_layouts spec:
*
* "Variables and block members qualified with *xfb_offset* can be
* scalars, vectors, matrices, structures, and (sized) arrays of these.
* The offset must be a multiple of the size of the first component of
* the first qualified variable or block member, or a compile-time error
* results. Further, if applied to an aggregate containing a double,
* the offset must also be a multiple of 8, and the space taken in the
* buffer will be a multiple of 8.
*/
static bool
validate_xfb_offset_qualifier(YYLTYPE *loc,
struct _mesa_glsl_parse_state *state,
int xfb_offset, const glsl_type *type,
unsigned component_size) {
const glsl_type *t_without_array = type->without_array();
if (xfb_offset != -1 && type->is_unsized_array()) {
_mesa_glsl_error(loc, state,
"xfb_offset can't be used with unsized arrays.");
return false;
}
/* If the location is const (and we've verified that
* it is) then no instructions should have been emitted
* when we converted it to HIR. If they were emitted,
* then either the location isn't const after all, or
* we are emitting unnecessary instructions.
/* Make sure nested structs don't contain unsized arrays, and validate
* any xfb_offsets on interface members.
*/
assert(dummy_instructions.is_empty());
if (t_without_array->is_record() || t_without_array->is_interface())
for (unsigned int i = 0; i < t_without_array->length; i++) {
const glsl_type *member_t = t_without_array->fields.structure[i].type;
/* When the interface block doesn't have an xfb_offset qualifier then
* we apply the component size rules at the member level.
*/
if (xfb_offset == -1)
component_size = member_t->contains_double() ? 8 : 4;
int xfb_offset = t_without_array->fields.structure[i].offset;
validate_xfb_offset_qualifier(loc, state, xfb_offset, member_t,
component_size);
}
/* Nested structs or interface block without offset may not have had an
* offset applied yet so return.
*/
if (xfb_offset == -1) {
return true;
}
if (xfb_offset % component_size) {
_mesa_glsl_error(loc, state,
"invalid qualifier xfb_offset=%d must be a multiple "
"of the first component size of the first qualified "
"variable or block member. Or double if an aggregate "
"that contains a double (%d).",
xfb_offset, component_size);
return false;
}
*value = const_int->value.u[0];
return true;
}
@@ -3151,6 +3224,39 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
}
}
if (qual->flags.q.out && qual->flags.q.xfb_buffer) {
unsigned qual_xfb_buffer;
if (process_qualifier_constant(state, loc, "xfb_buffer",
qual->xfb_buffer, &qual_xfb_buffer) &&
validate_xfb_buffer_qualifier(loc, state, qual_xfb_buffer)) {
var->data.xfb_buffer = qual_xfb_buffer;
if (qual->flags.q.explicit_xfb_buffer)
var->data.explicit_xfb_buffer = true;
}
}
if (qual->flags.q.explicit_xfb_offset) {
unsigned qual_xfb_offset;
unsigned component_size = var->type->contains_double() ? 8 : 4;
if (process_qualifier_constant(state, loc, "xfb_offset",
qual->offset, &qual_xfb_offset) &&
validate_xfb_offset_qualifier(loc, state, (int) qual_xfb_offset,
var->type, component_size)) {
var->data.offset = qual_xfb_offset;
var->data.explicit_xfb_offset = true;
}
}
if (qual->flags.q.explicit_xfb_stride) {
unsigned qual_xfb_stride;
if (process_qualifier_constant(state, loc, "xfb_stride",
qual->xfb_stride, &qual_xfb_stride)) {
var->data.xfb_stride = qual_xfb_stride;
var->data.explicit_xfb_stride = true;
}
}
if (var->type->contains_atomic()) {
if (var->data.mode == ir_var_uniform) {
if (var->data.explicit_binding) {
@@ -5746,6 +5852,11 @@ ast_switch_statement::test_to_hir(exec_list *instructions,
{
void *ctx = state;
/* set to true to avoid a duplicate "use of uninitialized variable" warning
* on the switch test case. The first one would be already raised when
* getting the test_expression at ast_switch_statement::hir
*/
test_expression->set_is_lhs(true);
/* Cache value of test expression. */
ir_rvalue *const test_val =
test_expression->hir(instructions,
@@ -6258,6 +6369,8 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
ir_variable_mode var_mode,
ast_type_qualifier *layout,
unsigned block_stream,
unsigned block_xfb_buffer,
unsigned block_xfb_offset,
unsigned expl_location,
unsigned expl_align)
{
@@ -6413,6 +6526,35 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
}
}
int xfb_buffer;
unsigned explicit_xfb_buffer = 0;
if (qual->flags.q.explicit_xfb_buffer) {
unsigned qual_xfb_buffer;
if (process_qualifier_constant(state, &loc, "xfb_buffer",
qual->xfb_buffer, &qual_xfb_buffer)) {
explicit_xfb_buffer = 1;
if (qual_xfb_buffer != block_xfb_buffer)
_mesa_glsl_error(&loc, state, "xfb_buffer layout qualifier on "
"interface block member does not match "
"the interface block (%u vs %u)",
qual_xfb_buffer, block_xfb_buffer);
}
xfb_buffer = (int) qual_xfb_buffer;
} else {
if (layout)
explicit_xfb_buffer = layout->flags.q.xfb_buffer;
xfb_buffer = (int) block_xfb_buffer;
}
int xfb_stride = -1;
if (qual->flags.q.explicit_xfb_stride) {
unsigned qual_xfb_stride;
if (process_qualifier_constant(state, &loc, "xfb_stride",
qual->xfb_stride, &qual_xfb_stride)) {
xfb_stride = (int) qual_xfb_stride;
}
}
if (qual->flags.q.uniform && qual->has_interpolation()) {
_mesa_glsl_error(&loc, state,
"interpolation qualifiers cannot be used "
@@ -6458,6 +6600,10 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
fields[i].sample = qual->flags.q.sample ? 1 : 0;
fields[i].patch = qual->flags.q.patch ? 1 : 0;
fields[i].precision = qual->precision;
fields[i].offset = -1;
fields[i].explicit_xfb_buffer = explicit_xfb_buffer;
fields[i].xfb_buffer = xfb_buffer;
fields[i].xfb_stride = xfb_stride;
if (qual->flags.q.explicit_location) {
unsigned qual_location;
@@ -6520,8 +6666,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
"with std430 and std140 layouts");
}
}
} else {
fields[i].offset = -1;
}
if (qual->flags.q.explicit_align || expl_align != 0) {
@@ -6554,6 +6698,32 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
next_offset = glsl_align(next_offset + size, align);
}
/* From the ARB_enhanced_layouts spec:
*
* "The given offset applies to the first component of the first
* member of the qualified entity. Then, within the qualified
* entity, subsequent components are each assigned, in order, to
* the next available offset aligned to a multiple of that
* component's size. Aggregate types are flattened down to the
* component level to get this sequence of components."
*/
if (qual->flags.q.explicit_xfb_offset) {
unsigned xfb_offset;
if (process_qualifier_constant(state, &loc, "xfb_offset",
qual->offset, &xfb_offset)) {
fields[i].offset = xfb_offset;
block_xfb_offset = fields[i].offset +
MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
}
} else {
if (layout && layout->flags.q.explicit_xfb_offset) {
unsigned align = field_type->is_double() ? 8 : 4;
fields[i].offset = glsl_align(block_xfb_offset, align);
block_xfb_offset +=
MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
}
}
/* Propogate row- / column-major information down the fields of the
* structure or interface block. Structures need this data because
* the structure may contain a structure that contains ... a matrix
@@ -6648,6 +6818,8 @@ ast_struct_specifier::hir(exec_list *instructions,
ir_var_auto,
layout,
0, /* for interface only */
0, /* for interface only */
0, /* for interface only */
expl_location,
0 /* for interface only */);
@@ -6807,6 +6979,29 @@ ast_interface_block::hir(exec_list *instructions,
return NULL;
}
unsigned qual_xfb_buffer;
if (!process_qualifier_constant(state, &loc, "xfb_buffer",
layout.xfb_buffer, &qual_xfb_buffer) ||
!validate_xfb_buffer_qualifier(&loc, state, qual_xfb_buffer)) {
return NULL;
}
unsigned qual_xfb_offset;
if (layout.flags.q.explicit_xfb_offset) {
if (!process_qualifier_constant(state, &loc, "xfb_offset",
layout.offset, &qual_xfb_offset)) {
return NULL;
}
}
unsigned qual_xfb_stride;
if (layout.flags.q.explicit_xfb_stride) {
if (!process_qualifier_constant(state, &loc, "xfb_stride",
layout.xfb_stride, &qual_xfb_stride)) {
return NULL;
}
}
unsigned expl_location = 0;
if (layout.flags.q.explicit_location) {
if (!process_qualifier_constant(state, &loc, "location",
@@ -6842,6 +7037,8 @@ ast_interface_block::hir(exec_list *instructions,
var_mode,
&this->layout,
qual_stream,
qual_xfb_buffer,
qual_xfb_offset,
expl_location,
expl_align);
@@ -6956,6 +7153,12 @@ ast_interface_block::hir(exec_list *instructions,
earlier_per_vertex->fields.structure[j].patch;
fields[i].precision =
earlier_per_vertex->fields.structure[j].precision;
fields[i].explicit_xfb_buffer =
earlier_per_vertex->fields.structure[j].explicit_xfb_buffer;
fields[i].xfb_buffer =
earlier_per_vertex->fields.structure[j].xfb_buffer;
fields[i].xfb_stride =
earlier_per_vertex->fields.structure[j].xfb_stride;
}
}
@@ -6986,6 +7189,12 @@ ast_interface_block::hir(exec_list *instructions,
packing,
this->block_name);
unsigned component_size = block_type->contains_double() ? 8 : 4;
int xfb_offset =
layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
validate_xfb_offset_qualifier(&loc, state, xfb_offset, block_type,
component_size);
if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' "
@@ -7207,8 +7416,17 @@ ast_interface_block::hir(exec_list *instructions,
var->data.patch = fields[i].patch;
var->data.stream = qual_stream;
var->data.location = fields[i].location;
if (fields[i].location != -1)
var->data.explicit_location = true;
var->data.explicit_xfb_buffer = fields[i].explicit_xfb_buffer;
var->data.xfb_buffer = fields[i].xfb_buffer;
if (fields[i].offset != -1)
var->data.explicit_xfb_offset = true;
var->data.offset = fields[i].offset;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+118 -3
View File
@@ -79,7 +79,10 @@ ast_type_qualifier::has_layout() const
|| this->flags.q.explicit_index
|| this->flags.q.explicit_binding
|| this->flags.q.explicit_offset
|| this->flags.q.explicit_stream;
|| this->flags.q.explicit_stream
|| this->flags.q.explicit_xfb_buffer
|| this->flags.q.explicit_xfb_offset
|| this->flags.q.explicit_xfb_stride;
}
bool
@@ -229,6 +232,43 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
}
if (state->has_enhanced_layouts()) {
if (!this->flags.q.explicit_xfb_buffer) {
if (q.flags.q.xfb_buffer) {
this->flags.q.xfb_buffer = 1;
this->xfb_buffer = q.xfb_buffer;
} else if (!this->flags.q.xfb_buffer && this->flags.q.out) {
/* Assign global xfb_buffer value */
this->flags.q.xfb_buffer = 1;
this->xfb_buffer = state->out_qualifier->xfb_buffer;
}
}
if (q.flags.q.explicit_xfb_stride)
this->xfb_stride = q.xfb_stride;
/* Merge all we xfb_stride qualifiers into the global out */
if (q.flags.q.explicit_xfb_stride || this->flags.q.xfb_stride) {
/* Set xfb_stride flag to 0 to avoid adding duplicates every time
* there is a merge.
*/
this->flags.q.xfb_stride = 0;
unsigned buff_idx;
if (process_qualifier_constant(state, loc, "xfb_buffer",
this->xfb_buffer, &buff_idx)) {
if (state->out_qualifier->out_xfb_stride[buff_idx]) {
state->out_qualifier->out_xfb_stride[buff_idx]->merge_qualifier(
new(state) ast_layout_expression(*loc, this->xfb_stride));
} else {
state->out_qualifier->out_xfb_stride[buff_idx] =
new(state) ast_layout_expression(*loc, this->xfb_stride);
}
}
}
}
if (q.flags.q.vertices) {
if (this->vertices) {
this->vertices->merge_qualifier(q.vertices);
@@ -300,7 +340,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
if (q.flags.q.explicit_binding)
this->binding = q.binding;
if (q.flags.q.explicit_offset)
if (q.flags.q.explicit_offset || q.flags.q.explicit_xfb_offset)
this->offset = q.offset;
if (q.precision != ast_precision_none)
@@ -322,6 +362,8 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
{
void *mem_ctx = state;
const bool r = this->merge_qualifier(loc, state, q, false);
ast_type_qualifier valid_out_mask;
valid_out_mask.flags.i = 0;
if (state->stage == MESA_SHADER_GEOMETRY) {
if (q.flags.q.prim_type) {
@@ -340,13 +382,45 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
/* Allow future assigments of global out's stream id value */
this->flags.q.explicit_stream = 0;
valid_out_mask.flags.q.stream = 1;
valid_out_mask.flags.q.explicit_stream = 1;
valid_out_mask.flags.q.explicit_xfb_buffer = 1;
valid_out_mask.flags.q.xfb_buffer = 1;
valid_out_mask.flags.q.explicit_xfb_stride = 1;
valid_out_mask.flags.q.xfb_stride = 1;
valid_out_mask.flags.q.max_vertices = 1;
valid_out_mask.flags.q.prim_type = 1;
} else if (state->stage == MESA_SHADER_TESS_CTRL) {
if (create_node) {
node = new(mem_ctx) ast_tcs_output_layout(*loc);
}
valid_out_mask.flags.q.vertices = 1;
valid_out_mask.flags.q.explicit_xfb_buffer = 1;
valid_out_mask.flags.q.xfb_buffer = 1;
valid_out_mask.flags.q.explicit_xfb_stride = 1;
valid_out_mask.flags.q.xfb_stride = 1;
} else if (state->stage == MESA_SHADER_TESS_EVAL ||
state->stage == MESA_SHADER_VERTEX) {
valid_out_mask.flags.q.explicit_xfb_buffer = 1;
valid_out_mask.flags.q.xfb_buffer = 1;
valid_out_mask.flags.q.explicit_xfb_stride = 1;
valid_out_mask.flags.q.xfb_stride = 1;
} else {
_mesa_glsl_error(loc, state, "out layout qualifiers only valid in "
"tessellation control or geometry shaders");
"geometry, tessellation and vertex shaders");
return false;
}
/* Allow future assigments of global out's */
this->flags.q.explicit_xfb_buffer = 0;
this->flags.q.explicit_xfb_stride = 0;
/* Generate an error when invalid input layout qualifiers are used. */
if ((q.flags.i & ~valid_out_mask.flags.i) != 0) {
_mesa_glsl_error(loc, state,
"invalid output layout qualifiers used");
return false;
}
return r;
@@ -566,3 +640,44 @@ ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state
return true;
}
bool
process_qualifier_constant(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const char *qual_indentifier,
ast_expression *const_expression,
unsigned *value)
{
exec_list dummy_instructions;
if (const_expression == NULL) {
*value = 0;
return true;
}
ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
ir_constant *const const_int = ir->constant_expression_value();
if (const_int == NULL || !const_int->type->is_integer()) {
_mesa_glsl_error(loc, state, "%s must be an integral constant "
"expression", qual_indentifier);
return false;
}
if (const_int->value.i[0] < 0) {
_mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
qual_indentifier, const_int->value.u[0]);
return false;
}
/* If the location is const (and we've verified that
* it is) then no instructions should have been emitted
* when we converted it to HIR. If they were emitted,
* then either the location isn't const after all, or
* we are emitting unnecessary instructions.
*/
assert(dummy_instructions.is_empty());
*value = const_int->value.u[0];
return true;
}
+21 -17
View File
@@ -129,12 +129,6 @@ v130_fs_only(const _mesa_glsl_parse_state *state)
state->stage == MESA_SHADER_FRAGMENT;
}
static bool
v140(const _mesa_glsl_parse_state *state)
{
return state->is_version(140, 0);
}
static bool
v140_or_es3(const _mesa_glsl_parse_state *state)
{
@@ -183,6 +177,14 @@ v110_lod(const _mesa_glsl_parse_state *state)
return !state->es_shader && lod_exists_in_stage(state);
}
static bool
texture_buffer(const _mesa_glsl_parse_state *state)
{
return state->is_version(140, 320) ||
state->EXT_texture_buffer_enable ||
state->OES_texture_buffer_enable;
}
static bool
shader_texture_lod(const _mesa_glsl_parse_state *state)
{
@@ -262,10 +264,12 @@ shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state)
}
static bool
fs_gpu_shader5(const _mesa_glsl_parse_state *state)
fs_interpolate_at(const _mesa_glsl_parse_state *state)
{
return state->stage == MESA_SHADER_FRAGMENT &&
(state->is_version(400, 0) || state->ARB_gpu_shader5_enable);
(state->is_version(400, 320) ||
state->ARB_gpu_shader5_enable ||
state->OES_shader_multisample_interpolation_enable);
}
@@ -1581,9 +1585,9 @@ builtin_builder::create_builtins()
_textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2DRect_type),
_textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRectShadow_type),
_textureSize(v140, glsl_type::int_type, glsl_type::samplerBuffer_type),
_textureSize(v140, glsl_type::int_type, glsl_type::isamplerBuffer_type),
_textureSize(v140, glsl_type::int_type, glsl_type::usamplerBuffer_type),
_textureSize(texture_buffer, glsl_type::int_type, glsl_type::samplerBuffer_type),
_textureSize(texture_buffer, glsl_type::int_type, glsl_type::isamplerBuffer_type),
_textureSize(texture_buffer, glsl_type::int_type, glsl_type::usamplerBuffer_type),
_textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::sampler2DMS_type),
_textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type),
_textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type),
@@ -1855,9 +1859,9 @@ builtin_builder::create_builtins()
_texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type),
_texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type),
_texelFetch(v140, glsl_type::vec4_type, glsl_type::samplerBuffer_type, glsl_type::int_type),
_texelFetch(v140, glsl_type::ivec4_type, glsl_type::isamplerBuffer_type, glsl_type::int_type),
_texelFetch(v140, glsl_type::uvec4_type, glsl_type::usamplerBuffer_type, glsl_type::int_type),
_texelFetch(texture_buffer, glsl_type::vec4_type, glsl_type::samplerBuffer_type, glsl_type::int_type),
_texelFetch(texture_buffer, glsl_type::ivec4_type, glsl_type::isamplerBuffer_type, glsl_type::int_type),
_texelFetch(texture_buffer, glsl_type::uvec4_type, glsl_type::usamplerBuffer_type, glsl_type::int_type),
_texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMS_type, glsl_type::ivec2_type),
_texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
@@ -5163,7 +5167,7 @@ builtin_builder::_interpolateAtCentroid(const glsl_type *type)
{
ir_variable *interpolant = in_var(type, "interpolant");
interpolant->data.must_be_shader_input = 1;
MAKE_SIG(type, fs_gpu_shader5, 1, interpolant);
MAKE_SIG(type, fs_interpolate_at, 1, interpolant);
body.emit(ret(interpolate_at_centroid(interpolant)));
@@ -5176,7 +5180,7 @@ builtin_builder::_interpolateAtOffset(const glsl_type *type)
ir_variable *interpolant = in_var(type, "interpolant");
interpolant->data.must_be_shader_input = 1;
ir_variable *offset = in_var(glsl_type::vec2_type, "offset");
MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset);
MAKE_SIG(type, fs_interpolate_at, 2, interpolant, offset);
body.emit(ret(interpolate_at_offset(interpolant, offset)));
@@ -5189,7 +5193,7 @@ builtin_builder::_interpolateAtSample(const glsl_type *type)
ir_variable *interpolant = in_var(type, "interpolant");
interpolant->data.must_be_shader_input = 1;
ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num");
MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num);
MAKE_SIG(type, fs_interpolate_at, 2, interpolant, sample_num);
body.emit(ret(interpolate_at_sample(interpolant, sample_num)));
+16 -6
View File
@@ -179,7 +179,7 @@ static const struct builtin_type_versions {
T(sampler2DArray, 130, 300)
T(samplerCubeArray, 400, 999)
T(sampler2DRect, 140, 999)
T(samplerBuffer, 140, 999)
T(samplerBuffer, 140, 320)
T(sampler2DMS, 150, 310)
T(sampler2DMSArray, 150, 999)
@@ -191,7 +191,7 @@ static const struct builtin_type_versions {
T(isampler2DArray, 130, 300)
T(isamplerCubeArray, 400, 999)
T(isampler2DRect, 140, 999)
T(isamplerBuffer, 140, 999)
T(isamplerBuffer, 140, 320)
T(isampler2DMS, 150, 310)
T(isampler2DMSArray, 150, 999)
@@ -203,7 +203,7 @@ static const struct builtin_type_versions {
T(usampler2DArray, 130, 300)
T(usamplerCubeArray, 400, 999)
T(usampler2DRect, 140, 999)
T(usamplerBuffer, 140, 999)
T(usamplerBuffer, 140, 320)
T(usampler2DMS, 150, 310)
T(usampler2DMSArray, 150, 999)
@@ -222,7 +222,7 @@ static const struct builtin_type_versions {
T(image3D, 420, 310)
T(image2DRect, 420, 999)
T(imageCube, 420, 310)
T(imageBuffer, 420, 999)
T(imageBuffer, 420, 320)
T(image1DArray, 420, 999)
T(image2DArray, 420, 310)
T(imageCubeArray, 420, 999)
@@ -233,7 +233,7 @@ static const struct builtin_type_versions {
T(iimage3D, 420, 310)
T(iimage2DRect, 420, 999)
T(iimageCube, 420, 310)
T(iimageBuffer, 420, 999)
T(iimageBuffer, 420, 320)
T(iimage1DArray, 420, 999)
T(iimage2DArray, 420, 310)
T(iimageCubeArray, 420, 999)
@@ -244,7 +244,7 @@ static const struct builtin_type_versions {
T(uimage3D, 420, 310)
T(uimage2DRect, 420, 999)
T(uimageCube, 420, 310)
T(uimageBuffer, 420, 999)
T(uimageBuffer, 420, 320)
T(uimage1DArray, 420, 999)
T(uimage2DArray, 420, 310)
T(uimageCubeArray, 420, 999)
@@ -371,6 +371,16 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
add_type(symbols, glsl_type::uimage2DMSArray_type);
}
if (state->EXT_texture_buffer_enable || state->OES_texture_buffer_enable) {
add_type(symbols, glsl_type::samplerBuffer_type);
add_type(symbols, glsl_type::isamplerBuffer_type);
add_type(symbols, glsl_type::usamplerBuffer_type);
add_type(symbols, glsl_type::imageBuffer_type);
add_type(symbols, glsl_type::iimageBuffer_type);
add_type(symbols, glsl_type::uimageBuffer_type);
}
if (state->has_atomic_counters()) {
add_type(symbols, glsl_type::atomic_uint_type);
}
+23 -3
View File
@@ -334,6 +334,9 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].image_coherent = 0;
this->fields[this->num_fields].image_volatile = 0;
this->fields[this->num_fields].image_restrict = 0;
this->fields[this->num_fields].explicit_xfb_buffer = 0;
this->fields[this->num_fields].xfb_buffer = -1;
this->fields[this->num_fields].xfb_stride = -1;
this->num_fields++;
}
@@ -812,6 +815,13 @@ builtin_variable_generator::generate_constants()
*/
}
if (state->has_enhanced_layouts()) {
add_const("gl_MaxTransformFeedbackBuffers",
state->Const.MaxTransformFeedbackBuffers);
add_const("gl_MaxTransformFeedbackInterleavedComponents",
state->Const.MaxTransformFeedbackInterleavedComponents);
}
if (state->is_version(420, 310) ||
state->ARB_shader_image_load_store_enable) {
add_const("gl_MaxImageUnits",
@@ -868,6 +878,10 @@ builtin_variable_generator::generate_constants()
add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents);
add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents);
}
if (state->is_version(450, 320) ||
state->OES_sample_variables_enable)
add_const("gl_MaxSamples", state->Const.MaxSamples);
}
@@ -877,7 +891,9 @@ builtin_variable_generator::generate_constants()
void
builtin_variable_generator::generate_uniforms()
{
if (state->is_version(400, 0) || state->ARB_sample_shading_enable)
if (state->is_version(400, 320) ||
state->ARB_sample_shading_enable ||
state->OES_sample_variables_enable)
add_uniform(int_t, "gl_NumSamples");
add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange");
add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA");
@@ -1130,7 +1146,9 @@ builtin_variable_generator::generate_fs_special_vars()
var->enable_extension_warning("GL_AMD_shader_stencil_export");
}
if (state->is_version(400, 0) || state->ARB_sample_shading_enable) {
if (state->is_version(400, 320) ||
state->ARB_sample_shading_enable ||
state->OES_sample_variables_enable) {
add_system_value(SYSTEM_VALUE_SAMPLE_ID, int_t, "gl_SampleID");
add_system_value(SYSTEM_VALUE_SAMPLE_POS, vec2_t, "gl_SamplePosition");
/* From the ARB_sample_shading specification:
@@ -1143,7 +1161,9 @@ builtin_variable_generator::generate_fs_special_vars()
add_output(FRAG_RESULT_SAMPLE_MASK, array(int_t, 1), "gl_SampleMask");
}
if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) {
if (state->is_version(400, 320) ||
state->ARB_gpu_shader5_enable ||
state->OES_sample_variables_enable) {
add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn");
}
+8
View File
@@ -2371,6 +2371,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
if (extensions != NULL) {
if (extensions->OES_EGL_image_external)
add_builtin_define(parser, "GL_OES_EGL_image_external", 1);
if (extensions->OES_sample_variables) {
add_builtin_define(parser, "GL_OES_sample_variables", 1);
add_builtin_define(parser, "GL_OES_shader_multisample_interpolation", 1);
}
if (extensions->OES_standard_derivatives)
add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
if (extensions->ARB_texture_multisample)
@@ -2390,6 +2394,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_EXT_gpu_shader5", 1);
add_builtin_define(parser, "GL_OES_gpu_shader5", 1);
}
if (extensions->OES_texture_buffer) {
add_builtin_define(parser, "GL_EXT_texture_buffer", 1);
add_builtin_define(parser, "GL_OES_texture_buffer", 1);
}
}
}
} else {
+14 -7
View File
@@ -369,7 +369,7 @@ image2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_l
image3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D);
image2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT);
imageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE);
imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER);
imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, IMAGEBUFFER);
image1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY);
image2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY);
imageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY);
@@ -380,7 +380,7 @@ iimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_l
iimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D);
iimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT);
iimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE);
iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER);
iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, IIMAGEBUFFER);
iimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY);
iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY);
iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY);
@@ -391,7 +391,7 @@ uimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_l
uimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D);
uimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT);
uimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE);
uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER);
uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, UIMAGEBUFFER);
uimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY);
uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY);
uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY);
@@ -472,6 +472,13 @@ layout {
\.[0-9]+([eE][+-]?[0-9]+)?[fF]? |
[0-9]+\.([eE][+-]?[0-9]+)?[fF]? |
[0-9]+[eE][+-]?[0-9]+[fF]? {
struct _mesa_glsl_parse_state *state = yyextra;
char suffix = yytext[strlen(yytext) - 1];
if (!state->is_version(120, 300) &&
(suffix == 'f' || suffix == 'F')) {
_mesa_glsl_error(yylloc, state,
"Float suffixes are invalid in GLSL 1.10");
}
yylval->real = _mesa_strtof(yytext, NULL);
return FLOATCONSTANT;
}
@@ -565,19 +572,19 @@ common KEYWORD(130, 300, 0, 0, COMMON);
partition KEYWORD(130, 300, 0, 0, PARTITION);
active KEYWORD(130, 300, 0, 0, ACTIVE);
superp KEYWORD(130, 100, 0, 0, SUPERP);
samplerBuffer KEYWORD(130, 300, 140, 0, SAMPLERBUFFER);
samplerBuffer KEYWORD_WITH_ALT(130, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, SAMPLERBUFFER);
filter KEYWORD(130, 300, 0, 0, FILTER);
row_major KEYWORD_WITH_ALT(130, 0, 140, 0, yyextra->ARB_uniform_buffer_object_enable && !yyextra->es_shader, ROW_MAJOR);
/* Additional reserved words in GLSL 1.40 */
isampler2DRect KEYWORD(140, 300, 140, 0, ISAMPLER2DRECT);
usampler2DRect KEYWORD(140, 300, 140, 0, USAMPLER2DRECT);
isamplerBuffer KEYWORD(140, 300, 140, 0, ISAMPLERBUFFER);
usamplerBuffer KEYWORD(140, 300, 140, 0, USAMPLERBUFFER);
isamplerBuffer KEYWORD_WITH_ALT(140, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, ISAMPLERBUFFER);
usamplerBuffer KEYWORD_WITH_ALT(140, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, USAMPLERBUFFER);
/* Additional reserved words in GLSL ES 3.00 */
resource KEYWORD(0, 300, 0, 0, RESOURCE);
sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE);
sample KEYWORD_WITH_ALT(400, 300, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->OES_shader_multisample_interpolation_enable, SAMPLE);
subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
+25
View File
@@ -1541,6 +1541,25 @@ layout_qualifier_id:
}
}
if (state->has_enhanced_layouts()) {
if (match_layout_qualifier("xfb_buffer", $1, state) == 0) {
$$.flags.q.xfb_buffer = 1;
$$.flags.q.explicit_xfb_buffer = 1;
$$.xfb_buffer = $3;
}
if (match_layout_qualifier("xfb_offset", $1, state) == 0) {
$$.flags.q.explicit_xfb_offset = 1;
$$.offset = $3;
}
if (match_layout_qualifier("xfb_stride", $1, state) == 0) {
$$.flags.q.xfb_stride = 1;
$$.flags.q.explicit_xfb_stride = 1;
$$.xfb_stride = $3;
}
}
static const char * const local_size_qualifiers[3] = {
"local_size_x",
"local_size_y",
@@ -1915,6 +1934,12 @@ storage_qualifier:
$$.flags.q.explicit_stream = 0;
$$.stream = state->out_qualifier->stream;
}
if (state->has_enhanced_layouts()) {
$$.flags.q.xfb_buffer = 1;
$$.flags.q.explicit_xfb_buffer = 0;
$$.xfb_buffer = state->out_qualifier->xfb_buffer;
}
}
| UNIFORM
{
+34 -5
View File
@@ -140,6 +140,10 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxAtomicCounterBufferSize =
ctx->Const.MaxAtomicBufferSize;
/* ARB_enhanced_layouts constants */
this->Const.MaxTransformFeedbackBuffers = ctx->Const.MaxTransformFeedbackBuffers;
this->Const.MaxTransformFeedbackInterleavedComponents = ctx->Const.MaxTransformFeedbackInterleavedComponents;
/* Compute shader constants */
for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++)
this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i];
@@ -177,6 +181,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents;
this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents;
/* GL 4.5 / OES_sample_variables */
this->Const.MaxSamples = ctx->Const.MaxSamples;
this->current_function = NULL;
this->toplevel_ir = NULL;
this->found_return = false;
@@ -610,9 +617,12 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_gpu_shader5, false, true, ARB_gpu_shader5),
EXT(OES_sample_variables, false, true, OES_sample_variables),
EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store),
EXT(OES_shader_multisample_interpolation, false, true, OES_sample_variables),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
EXT(OES_texture_buffer, false, true, OES_texture_buffer),
EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
/* All other extensions go here, sorted alphabetically.
@@ -629,6 +639,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix),
EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical),
EXT(EXT_texture_array, true, false, EXT_texture_array),
EXT(EXT_texture_buffer, false, true, OES_texture_buffer),
};
#undef EXT
@@ -935,6 +946,13 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
block->layout.stream = state->out_qualifier->stream;
}
if (state->has_enhanced_layouts() && block->layout.flags.q.out) {
/* Assign global layout's xfb_buffer value. */
block->layout.flags.q.xfb_buffer = 1;
block->layout.flags.q.explicit_xfb_buffer = 0;
block->layout.xfb_buffer = state->out_qualifier->xfb_buffer;
}
foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
ast_type_qualifier& qualifier = member->type->qualifier;
if ((qualifier.flags.i & interface_type_mask) == 0) {
@@ -1206,6 +1224,7 @@ ast_expression::ast_expression(int oper,
this->subexpressions[1] = ex1;
this->subexpressions[2] = ex2;
this->non_lvalue_description = NULL;
this->is_lhs = false;
}
@@ -1583,13 +1602,12 @@ set_shader_inout_layout(struct gl_shader *shader,
struct _mesa_glsl_parse_state *state)
{
/* Should have been prevented by the parser. */
if (shader->Stage == MESA_SHADER_TESS_CTRL) {
if (shader->Stage == MESA_SHADER_TESS_CTRL ||
shader->Stage == MESA_SHADER_VERTEX) {
assert(!state->in_qualifier->flags.i);
} else if (shader->Stage == MESA_SHADER_TESS_EVAL) {
assert(!state->out_qualifier->flags.i);
} else if (shader->Stage != MESA_SHADER_GEOMETRY) {
} else if (shader->Stage != MESA_SHADER_GEOMETRY &&
shader->Stage != MESA_SHADER_TESS_EVAL) {
assert(!state->in_qualifier->flags.i);
assert(!state->out_qualifier->flags.i);
}
if (shader->Stage != MESA_SHADER_COMPUTE) {
@@ -1606,6 +1624,17 @@ set_shader_inout_layout(struct gl_shader *shader,
assert(!state->fs_early_fragment_tests);
}
for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
if (state->out_qualifier->out_xfb_stride[i]) {
unsigned xfb_stride;
if (state->out_qualifier->out_xfb_stride[i]->
process_qualifier_constant(state, "xfb_stride", &xfb_stride,
true)) {
shader->TransformFeedback.BufferStride[i] = xfb_stride;
}
}
}
switch (shader->Stage) {
case MESA_SHADER_TESS_CTRL:
shader->TessCtrl.VerticesOut = 0;
+15
View File
@@ -383,6 +383,10 @@ struct _mesa_glsl_parse_state {
/* ARB_draw_buffers */
unsigned MaxDrawBuffers;
/* ARB_enhanced_layouts */
unsigned MaxTransformFeedbackBuffers;
unsigned MaxTransformFeedbackInterleavedComponents;
/* ARB_blend_func_extended */
unsigned MaxDualSourceDrawBuffers;
@@ -457,6 +461,9 @@ struct _mesa_glsl_parse_state {
unsigned MaxTessControlTotalOutputComponents;
unsigned MaxTessControlUniformComponents;
unsigned MaxTessEvaluationUniformComponents;
/* GL 4.5 / OES_sample_variables */
unsigned MaxSamples;
} Const;
/**
@@ -597,12 +604,18 @@ struct _mesa_glsl_parse_state {
bool OES_geometry_shader_warn;
bool OES_gpu_shader5_enable;
bool OES_gpu_shader5_warn;
bool OES_sample_variables_enable;
bool OES_sample_variables_warn;
bool OES_shader_image_atomic_enable;
bool OES_shader_image_atomic_warn;
bool OES_shader_multisample_interpolation_enable;
bool OES_shader_multisample_interpolation_warn;
bool OES_standard_derivatives_enable;
bool OES_standard_derivatives_warn;
bool OES_texture_3D_enable;
bool OES_texture_3D_warn;
bool OES_texture_buffer_enable;
bool OES_texture_buffer_warn;
bool OES_texture_storage_multisample_2d_array_enable;
bool OES_texture_storage_multisample_2d_array_warn;
@@ -632,6 +645,8 @@ struct _mesa_glsl_parse_state {
bool EXT_shader_samples_identical_warn;
bool EXT_texture_array_enable;
bool EXT_texture_array_warn;
bool EXT_texture_buffer_enable;
bool EXT_texture_buffer_warn;
/*@}*/
/** Extensions supported by the OpenGL implementation. */
+28 -15
View File
@@ -726,6 +726,21 @@ public:
*/
unsigned is_xfb_only:1;
/**
* Was a transfor feedback buffer set in the shader?
*/
unsigned explicit_xfb_buffer:1;
/**
* Was a transfor feedback offset set in the shader?
*/
unsigned explicit_xfb_offset:1;
/**
* Was a transfor feedback stride set in the shader?
*/
unsigned explicit_xfb_stride:1;
/**
* If non-zero, then this variable may be packed along with other variables
* into a single varying slot, so this offset should be applied when
@@ -742,21 +757,9 @@ public:
/**
* Non-zero if this variable was created by lowering a named interface
* block which was not an array.
*
* Note that this variable and \c from_named_ifc_block_array will never
* both be non-zero.
* block.
*/
unsigned from_named_ifc_block_nonarray:1;
/**
* Non-zero if this variable was created by lowering a named interface
* block which was an array.
*
* Note that this variable and \c from_named_ifc_block_nonarray will never
* both be non-zero.
*/
unsigned from_named_ifc_block_array:1;
unsigned from_named_ifc_block:1;
/**
* Non-zero if the variable must be a shader input. This is useful for
@@ -873,7 +876,7 @@ public:
unsigned stream;
/**
* Atomic or block member offset.
* Atomic, transform feedback or block member offset.
*/
unsigned offset;
@@ -884,6 +887,16 @@ public:
*/
unsigned max_array_access;
/**
* Transform feedback buffer.
*/
unsigned xfb_buffer;
/**
* Transform feedback stride.
*/
unsigned xfb_stride;
/**
* Allow (only) ir_variable direct access private members.
*/
-5
View File
@@ -105,11 +105,6 @@ struct gl_uniform_storage {
*/
unsigned array_elements;
/**
* Has this uniform ever been set?
*/
bool initialized;
struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES];
/**
+4 -2
View File
@@ -242,7 +242,8 @@ public:
return entry ? (ir_variable *) entry->data : NULL;
} else {
const struct hash_entry *entry =
_mesa_hash_table_search(ht, var->get_interface_type()->name);
_mesa_hash_table_search(ht,
var->get_interface_type()->without_array()->name);
return entry ? (ir_variable *) entry->data : NULL;
}
}
@@ -263,7 +264,8 @@ public:
snprintf(location_str, 11, "%d", var->data.location);
_mesa_hash_table_insert(ht, ralloc_strdup(mem_ctx, location_str), var);
} else {
_mesa_hash_table_insert(ht, var->get_interface_type()->name, var);
_mesa_hash_table_insert(ht,
var->get_interface_type()->without_array()->name, var);
}
}
@@ -162,8 +162,6 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
}
}
}
storage->initialized = true;
}
}
@@ -183,7 +181,7 @@ set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
if (stage_index != -1) {
struct gl_shader *sh = prog->_LinkedShaders[i];
sh->BufferInterfaceBlocks[stage_index].Binding = binding;
sh->BufferInterfaceBlocks[stage_index]->Binding = binding;
}
}
}
@@ -267,8 +265,6 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
}
}
}
storage->initialized = true;
}
}
+31 -77
View File
@@ -68,7 +68,7 @@ program_resource_visitor::process(const glsl_type *type, const char *name)
unsigned packing = type->interface_packing;
recursion(type, &name_copy, strlen(name), false, NULL, packing, false,
record_array_count);
record_array_count, NULL);
ralloc_free(name_copy);
}
@@ -76,8 +76,6 @@ void
program_resource_visitor::process(ir_variable *var)
{
unsigned record_array_count = 1;
const glsl_type *t = var->type;
const glsl_type *t_without_array = var->type->without_array();
const bool row_major =
var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
@@ -85,80 +83,28 @@ program_resource_visitor::process(ir_variable *var)
var->get_interface_type()->interface_packing :
var->type->interface_packing;
const glsl_type *t =
var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
const glsl_type *t_without_array = t->without_array();
/* false is always passed for the row_major parameter to the other
* processing functions because no information is available to do
* otherwise. See the warning in linker.h.
*/
/* Only strdup the name if we actually will need to modify it. */
if (var->data.from_named_ifc_block_array) {
/* lower_named_interface_blocks created this variable by lowering an
* interface block array to an array variable. For example if the
* original source code was:
*
* out Blk { vec4 bar } foo[3];
*
* Then the variable is now:
*
* out vec4 bar[3];
*
* We need to visit each array element using the names constructed like
* so:
*
* Blk[0].bar
* Blk[1].bar
* Blk[2].bar
*/
assert(t->is_array());
const glsl_type *ifc_type = var->get_interface_type();
char *name = ralloc_strdup(NULL, ifc_type->name);
size_t name_length = strlen(name);
for (unsigned i = 0; i < t->length; i++) {
size_t new_length = name_length;
ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i,
var->name);
/* Note: row_major is only meaningful for uniform blocks, and
* lowering is only applied to non-uniform interface blocks, so we
* can safely pass false for row_major.
*/
recursion(var->type, &name, new_length, row_major, NULL, packing,
false, record_array_count);
}
ralloc_free(name);
} else if (var->data.from_named_ifc_block_nonarray) {
/* lower_named_interface_blocks created this variable by lowering a
* named interface block (non-array) to an ordinary variable. For
* example if the original source code was:
*
* out Blk { vec4 bar } foo;
*
* Then the variable is now:
*
* out vec4 bar;
*
* We need to visit this variable using the name:
*
* Blk.bar
*/
const glsl_type *ifc_type = var->get_interface_type();
char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name);
/* Note: row_major is only meaningful for uniform blocks, and lowering
* is only applied to non-uniform interface blocks, so we can safely
* pass false for row_major.
*/
recursion(var->type, &name, strlen(name), row_major, NULL, packing,
false, record_array_count);
ralloc_free(name);
} else if (t_without_array->is_record() ||
if (t_without_array->is_record() ||
(t->is_array() && t->fields.array->is_array())) {
char *name = ralloc_strdup(NULL, var->name);
recursion(var->type, &name, strlen(name), row_major, NULL, packing,
false, record_array_count);
false, record_array_count, NULL);
ralloc_free(name);
} else if (t_without_array->is_interface()) {
char *name = ralloc_strdup(NULL, t_without_array->name);
recursion(var->type, &name, strlen(name), row_major, NULL, packing,
false, record_array_count);
const glsl_struct_field *ifc_member = var->data.from_named_ifc_block ?
&t_without_array->
fields.structure[t_without_array->field_index(var->name)] : NULL;
recursion(t, &name, strlen(name), row_major, NULL, packing,
false, record_array_count, ifc_member);
ralloc_free(name);
} else {
this->set_record_array_count(record_array_count);
@@ -172,7 +118,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
const glsl_type *record_type,
const unsigned packing,
bool last_field,
unsigned record_array_count)
unsigned record_array_count,
const glsl_struct_field *named_ifc_member)
{
/* Records need to have each field processed individually.
*
@@ -180,7 +127,12 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
* individually, then each field of the resulting array elements processed
* individually.
*/
if (t->is_record() || t->is_interface()) {
if (t->is_interface() && named_ifc_member) {
ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
named_ifc_member->name);
recursion(named_ifc_member->type, name, name_length, row_major, NULL,
packing, false, record_array_count, NULL);
} else if (t->is_record() || t->is_interface()) {
if (record_type == NULL && t->is_record())
record_type = t;
@@ -223,7 +175,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
field_row_major,
record_type,
packing,
(i + 1) == t->length, record_array_count);
(i + 1) == t->length, record_array_count, NULL);
/* Only the first leaf-field of the record gets called with the
* record type pointer.
@@ -258,7 +210,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
recursion(t->fields.array, name, new_length, row_major,
record_type,
packing,
(i + 1) == t->length, record_array_count);
(i + 1) == t->length, record_array_count,
named_ifc_member);
/* Only the first leaf-field of the record gets called with the
* record type pointer.
@@ -799,7 +752,6 @@ private:
this->uniforms[id].name = ralloc_strdup(this->uniforms, name);
this->uniforms[id].type = base_type;
this->uniforms[id].initialized = 0;
this->uniforms[id].num_driver_storage = 0;
this->uniforms[id].driver_storage = NULL;
this->uniforms[id].atomic_buffer_index = -1;
@@ -954,6 +906,8 @@ link_cross_validate_uniform_block(void *mem_ctx,
new_block->Uniforms,
sizeof(*linked_block->Uniforms) * linked_block->NumUniforms);
linked_block->Name = ralloc_strdup(*linked_blocks, linked_block->Name);
for (unsigned int i = 0; i < linked_block->NumUniforms; i++) {
struct gl_uniform_buffer_variable *ubo_var =
&linked_block->Uniforms[i];
@@ -1005,9 +959,9 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
const unsigned l = strlen(var->name);
for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) {
for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) {
for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i]->NumUniforms; j++) {
if (sentinel) {
const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name;
const char *begin = shader->BufferInterfaceBlocks[i]->Uniforms[j].Name;
const char *end = strchr(begin, sentinel);
if (end == NULL)
@@ -1022,7 +976,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
break;
}
} else if (!strcmp(var->name,
shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) {
shader->BufferInterfaceBlocks[i]->Uniforms[j].Name)) {
found = true;
var->data.location = j;
break;
@@ -1148,9 +1102,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
sh->num_combined_uniform_components = sh->num_uniform_components;
for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) {
if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) {
if (!sh->BufferInterfaceBlocks[i]->IsShaderStorage) {
sh->num_combined_uniform_components +=
sh->BufferInterfaceBlocks[i].UniformBufferSize / 4;
sh->BufferInterfaceBlocks[i]->UniformBufferSize / 4;
}
}
}
+278 -39
View File
@@ -63,6 +63,125 @@ get_varying_type(const ir_variable *var, gl_shader_stage stage)
return type;
}
static void
create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
size_t name_length, unsigned *count,
const char *ifc_member_name,
const glsl_type *ifc_member_t, char ***varying_names)
{
if (t->is_interface()) {
size_t new_length = name_length;
assert(ifc_member_name && ifc_member_t);
ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
NULL, NULL, varying_names);
} else if (t->is_record()) {
for (unsigned i = 0; i < t->length; i++) {
const char *field = t->fields.structure[i].name;
size_t new_length = name_length;
ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
new_length, count, NULL, NULL,
varying_names);
}
} else if (t->without_array()->is_record() ||
t->without_array()->is_interface() ||
(t->is_array() && t->fields.array->is_array())) {
for (unsigned i = 0; i < t->length; i++) {
size_t new_length = name_length;
/* Append the subscript to the current variable name */
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
count, ifc_member_name, ifc_member_t,
varying_names);
}
} else {
(*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
}
}
bool
process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh,
unsigned *num_tfeedback_decls,
char ***varying_names)
{
bool has_xfb_qualifiers = false;
/* We still need to enable transform feedback mode even if xfb_stride is
* only applied to a global out. Also we don't bother to propagate
* xfb_stride to interface block members so this will catch that case also.
*/
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (sh->TransformFeedback.BufferStride[j]) {
has_xfb_qualifiers = true;
}
}
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (!var || var->data.mode != ir_var_shader_out)
continue;
/* From the ARB_enhanced_layouts spec:
*
* "Any shader making any static use (after preprocessing) of any of
* these *xfb_* qualifiers will cause the shader to be in a
* transform feedback capturing mode and hence responsible for
* describing the transform feedback setup. This mode will capture
* any output selected by *xfb_offset*, directly or indirectly, to
* a transform feedback buffer."
*/
if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
has_xfb_qualifiers = true;
}
if (var->data.explicit_xfb_offset) {
*num_tfeedback_decls += var->type->varying_count();
has_xfb_qualifiers = true;
}
}
if (*num_tfeedback_decls == 0)
return has_xfb_qualifiers;
unsigned i = 0;
*varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (!var || var->data.mode != ir_var_shader_out)
continue;
if (var->data.explicit_xfb_offset) {
char *name;
const glsl_type *type, *member_type;
if (var->data.from_named_ifc_block) {
type = var->get_interface_type();
/* Find the member type before it was altered by lowering */
member_type =
type->fields.structure[type->field_index(var->name)].type;
name = ralloc_strdup(NULL, type->without_array()->name);
} else {
type = var->type;
member_type = NULL;
name = ralloc_strdup(NULL, var->name);
}
create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
var->name, member_type, varying_names);
ralloc_free(name);
}
}
assert(i == *num_tfeedback_decls);
return has_xfb_qualifiers;
}
/**
* Validate the types and qualifiers of an output from one stage against the
* matching input to another stage.
@@ -397,6 +516,8 @@ tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
this->next_buffer_separator = false;
this->matched_candidate = NULL;
this->stream_id = 0;
this->buffer = 0;
this->offset = 0;
if (ctx->Extensions.ARB_transform_feedback3) {
/* Parse gl_NextBuffer. */
@@ -489,6 +610,8 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
= this->matched_candidate->toplevel_var->data.location * 4
+ this->matched_candidate->toplevel_var->data.location_frac
+ this->matched_candidate->offset;
const unsigned dmul =
this->matched_candidate->type->without_array()->is_double() ? 2 : 1;
if (this->matched_candidate->type->is_array()) {
/* Array variable */
@@ -496,8 +619,6 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
this->matched_candidate->type->fields.array->matrix_columns;
const unsigned vector_elements =
this->matched_candidate->type->fields.array->vector_elements;
const unsigned dmul =
this->matched_candidate->type->fields.array->is_double() ? 2 : 1;
unsigned actual_array_size;
switch (this->lowered_builtin_array_variable) {
case clip_distance:
@@ -575,6 +696,12 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
*/
this->stream_id = this->matched_candidate->toplevel_var->data.stream;
unsigned array_offset = this->array_subscript * 4 * dmul;
unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
this->offset = this->matched_candidate->toplevel_var->data.offset +
array_offset + struct_offset;
return true;
}
@@ -598,55 +725,108 @@ tfeedback_decl::get_num_outputs() const
bool
tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
struct gl_transform_feedback_info *info,
unsigned buffer, const unsigned max_outputs) const
unsigned buffer, unsigned buffer_index,
const unsigned max_outputs, bool *explicit_stride,
bool has_xfb_qualifiers) const
{
assert(!this->next_buffer_separator);
/* Handle gl_SkipComponents. */
if (this->skip_components) {
info->BufferStride[buffer] += this->skip_components;
info->Buffers[buffer].Stride += this->skip_components;
return true;
}
unsigned xfb_offset = 0;
if (has_xfb_qualifiers) {
xfb_offset = this->offset / 4;
} else {
xfb_offset = info->Buffers[buffer].Stride;
}
info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
unsigned location = this->location;
unsigned location_frac = this->location_frac;
unsigned num_components = this->num_components();
while (num_components > 0) {
unsigned output_size = MIN2(num_components, 4 - location_frac);
assert((info->NumOutputs == 0 && max_outputs == 0) ||
info->NumOutputs < max_outputs);
/* From the ARB_enhanced_layouts spec:
*
* "If such a block member or variable is not written during a shader
* invocation, the buffer contents at the assigned offset will be
* undefined. Even if there are no static writes to a variable or
* member that is assigned a transform feedback offset, the space is
* still allocated in the buffer and still affects the stride."
*/
if (this->is_varying_written()) {
info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
info->Outputs[info->NumOutputs].OutputRegister = location;
info->Outputs[info->NumOutputs].NumComponents = output_size;
info->Outputs[info->NumOutputs].StreamId = stream_id;
info->Outputs[info->NumOutputs].OutputBuffer = buffer;
info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
++info->NumOutputs;
}
info->Buffers[buffer].Stream = this->stream_id;
xfb_offset += output_size;
num_components -= output_size;
location++;
location_frac = 0;
}
if (explicit_stride && explicit_stride[buffer]) {
if (this->is_double() && info->Buffers[buffer].Stride % 2) {
linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
"multiple of 8 as its applied to a type that is or "
"contains a double.",
info->Buffers[buffer].Stride * 4);
return false;
}
if ((this->offset / 4) / info->Buffers[buffer].Stride !=
(xfb_offset - 1) / info->Buffers[buffer].Stride) {
linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
"buffer (%d)", xfb_offset * 4,
info->Buffers[buffer].Stride * 4, buffer);
return false;
}
} else {
info->Buffers[buffer].Stride = xfb_offset;
}
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * the total number of components to capture is greater than
* the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
* and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
*
* From GL_ARB_enhanced_layouts:
*
* "The resulting stride (implicit or explicit) must be less than or
* equal to the implementation-dependent constant
* gl_MaxTransformFeedbackInterleavedComponents."
*/
if (prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS &&
info->BufferStride[buffer] + this->num_components() >
if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
has_xfb_qualifiers) &&
info->Buffers[buffer].Stride >
ctx->Const.MaxTransformFeedbackInterleavedComponents) {
linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
"limit has been exceeded.");
return false;
}
unsigned location = this->location;
unsigned location_frac = this->location_frac;
unsigned num_components = this->num_components();
while (num_components > 0) {
unsigned output_size = MIN2(num_components, 4 - location_frac);
assert(info->NumOutputs < max_outputs);
info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
info->Outputs[info->NumOutputs].OutputRegister = location;
info->Outputs[info->NumOutputs].NumComponents = output_size;
info->Outputs[info->NumOutputs].StreamId = stream_id;
info->Outputs[info->NumOutputs].OutputBuffer = buffer;
info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer];
++info->NumOutputs;
info->BufferStride[buffer] += output_size;
info->BufferStream[buffer] = this->stream_id;
num_components -= output_size;
location++;
location_frac = 0;
}
info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name);
info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
this->orig_name);
info->Varyings[info->NumVarying].Type = this->type;
info->Varyings[info->NumVarying].Size = this->size;
info->Varyings[info->NumVarying].BufferIndex = buffer_index;
info->NumVarying++;
info->Buffers[buffer].NumVaryings++;
return true;
}
@@ -731,6 +911,17 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
}
static int
cmp_xfb_offset(const void * x_generic, const void * y_generic)
{
tfeedback_decl *x = (tfeedback_decl *) x_generic;
tfeedback_decl *y = (tfeedback_decl *) y_generic;
if (x->get_buffer() != y->get_buffer())
return x->get_buffer() - y->get_buffer();
return x->get_offset() - y->get_offset();
}
/**
* Store transform feedback location assignments into
* prog->LinkedTransformFeedback based on the data stored in tfeedback_decls.
@@ -741,8 +932,13 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls)
tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
{
/* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
* tracking the number of buffers doesn't overflow.
*/
assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
bool separate_attribs_mode =
prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
@@ -752,14 +948,24 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
memset(&prog->LinkedTransformFeedback, 0,
sizeof(prog->LinkedTransformFeedback));
/* The xfb_offset qualifier does not have to be used in increasing order
* however some drivers expect to receive the list of transform feedback
* declarations in order so sort it now for convenience.
*/
if (has_xfb_qualifiers)
qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
cmp_xfb_offset);
prog->LinkedTransformFeedback.Varyings =
rzalloc_array(prog,
struct gl_transform_feedback_varying_info,
num_tfeedback_decls);
unsigned num_outputs = 0;
for (unsigned i = 0; i < num_tfeedback_decls; ++i)
num_outputs += tfeedback_decls[i].get_num_outputs();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (tfeedback_decls[i].is_varying_written())
num_outputs += tfeedback_decls[i].get_num_outputs();
}
prog->LinkedTransformFeedback.Outputs =
rzalloc_array(prog,
@@ -767,21 +973,47 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
num_outputs);
unsigned num_buffers = 0;
unsigned buffers = 0;
if (separate_attribs_mode) {
if (!has_xfb_qualifiers && separate_attribs_mode) {
/* GL_SEPARATE_ATTRIBS */
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback,
num_buffers, num_outputs))
num_buffers, num_buffers, num_outputs,
NULL, has_xfb_qualifiers))
return false;
buffers |= 1 << num_buffers;
num_buffers++;
}
}
else {
/* GL_INVERLEAVED_ATTRIBS */
int buffer_stream_id = -1;
unsigned buffer =
num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
/* Apply any xfb_stride global qualifiers */
if (has_xfb_qualifiers) {
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (prog->TransformFeedback.BufferStride[j]) {
buffers |= 1 << j;
explicit_stride[j] = true;
prog->LinkedTransformFeedback.Buffers[j].Stride =
prog->TransformFeedback.BufferStride[j] / 4;
}
}
}
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (has_xfb_qualifiers &&
buffer != tfeedback_decls[i].get_buffer()) {
/* we have moved to the next buffer so reset stream id */
buffer_stream_id = -1;
num_buffers++;
}
if (tfeedback_decls[i].is_next_buffer_separator()) {
num_buffers++;
buffer_stream_id = -1;
@@ -803,17 +1035,24 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
return false;
}
if (has_xfb_qualifiers) {
buffer = tfeedback_decls[i].get_buffer();
} else {
buffer = num_buffers;
}
buffers |= 1 << buffer;
if (!tfeedback_decls[i].store(ctx, prog,
&prog->LinkedTransformFeedback,
num_buffers, num_outputs))
buffer, num_buffers, num_outputs,
explicit_stride, has_xfb_qualifiers))
return false;
}
num_buffers++;
}
assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs);
prog->LinkedTransformFeedback.NumBuffers = num_buffers;
prog->LinkedTransformFeedback.ActiveBuffers = buffers;
return true;
}
@@ -1466,8 +1705,8 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
} else if (input_var->get_interface_type() != NULL) {
char *const iface_field_name =
ralloc_asprintf(mem_ctx, "%s.%s",
input_var->get_interface_type()->name,
input_var->name);
input_var->get_interface_type()->without_array()->name,
input_var->name);
hash_table_insert(consumer_interface_inputs, input_var,
iface_field_name);
} else {
@@ -1498,8 +1737,8 @@ get_matching_input(void *mem_ctx,
} else if (output_var->get_interface_type() != NULL) {
char *const iface_field_name =
ralloc_asprintf(mem_ctx, "%s.%s",
output_var->get_interface_type()->name,
output_var->name);
output_var->get_interface_type()->without_array()->name,
output_var->name);
input_var =
(ir_variable *) hash_table_find(consumer_interface_inputs,
iface_field_name);
+37 -2
View File
@@ -98,7 +98,8 @@ public:
unsigned get_num_outputs() const;
bool store(struct gl_context *ctx, struct gl_shader_program *prog,
struct gl_transform_feedback_info *info, unsigned buffer,
const unsigned max_outputs) const;
unsigned buffer_index, const unsigned max_outputs,
bool *explicit_stride, bool has_xfb_qualifiers) const;
const tfeedback_candidate *find_candidate(gl_shader_program *prog,
hash_table *tfeedback_candidates);
@@ -107,6 +108,14 @@ public:
return this->next_buffer_separator;
}
bool is_varying_written() const
{
if (this->next_buffer_separator || this->skip_components)
return false;
return this->matched_candidate->toplevel_var->data.assigned;
}
bool is_varying() const
{
return !this->next_buffer_separator && !this->skip_components;
@@ -122,6 +131,16 @@ public:
return this->stream_id;
}
unsigned get_buffer() const
{
return this->buffer;
}
unsigned get_offset() const
{
return this->offset;
}
/**
* The total number of varying components taken up by this variable. Only
* valid if assign_location() has been called.
@@ -201,6 +220,16 @@ private:
*/
int location;
/**
* Used to store the buffer assigned by xfb_buffer.
*/
unsigned buffer;
/**
* Used to store the offset assigned by xfb_offset.
*/
unsigned offset;
/**
* If non-zero, then this variable may be packed along with other variables
* into a single varying slot, so this offset should be applied when
@@ -268,6 +297,11 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
const void *mem_ctx, unsigned num_names,
char **varying_names, tfeedback_decl *decls);
bool
process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh,
unsigned *num_tfeedback_decls,
char ***varying_names);
void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
gl_shader *sh,
@@ -276,7 +310,8 @@ remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls);
tfeedback_decl *tfeedback_decls,
bool has_xfb_qualifiers);
bool
assign_varying_locations(struct gl_context *ctx,
+156 -39
View File
@@ -1192,11 +1192,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
int index = link_cross_validate_uniform_block(prog,
&prog->BufferInterfaceBlocks,
&prog->NumBufferInterfaceBlocks,
&sh->BufferInterfaceBlocks[j]);
sh->BufferInterfaceBlocks[j]);
if (index == -1) {
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
sh->BufferInterfaceBlocks[j].Name);
sh->BufferInterfaceBlocks[j]->Name);
return false;
}
@@ -1204,6 +1204,23 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
}
}
/* Update per stage block pointers to point to the program list.
* FIXME: We should be able to free the per stage blocks here.
*/
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
int stage_index =
prog->InterfaceBlockStageIndex[i][j];
if (stage_index != -1) {
struct gl_shader *sh = prog->_LinkedShaders[i];
sh->BufferInterfaceBlocks[stage_index] =
&prog->BufferInterfaceBlocks[j];
}
}
}
return true;
}
@@ -1567,6 +1584,69 @@ private:
hash_table *unnamed_interfaces;
};
/**
* Check for conflicting xfb_stride default qualifiers and store buffer stride
* for later use.
*/
static void
link_xfb_stride_layout_qualifiers(struct gl_context *ctx,
struct gl_shader_program *prog,
struct gl_shader *linked_shader,
struct gl_shader **shader_list,
unsigned num_shaders)
{
for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
linked_shader->TransformFeedback.BufferStride[i] = 0;
}
for (unsigned i = 0; i < num_shaders; i++) {
struct gl_shader *shader = shader_list[i];
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (shader->TransformFeedback.BufferStride[j]) {
if (linked_shader->TransformFeedback.BufferStride[j] != 0 &&
shader->TransformFeedback.BufferStride[j] != 0 &&
linked_shader->TransformFeedback.BufferStride[j] !=
shader->TransformFeedback.BufferStride[j]) {
linker_error(prog,
"intrastage shaders defined with conflicting "
"xfb_stride for buffer %d (%d and %d)\n", j,
linked_shader->TransformFeedback.BufferStride[j],
shader->TransformFeedback.BufferStride[j]);
return;
}
if (shader->TransformFeedback.BufferStride[j])
linked_shader->TransformFeedback.BufferStride[j] =
shader->TransformFeedback.BufferStride[j];
}
}
}
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (linked_shader->TransformFeedback.BufferStride[j]) {
prog->TransformFeedback.BufferStride[j] =
linked_shader->TransformFeedback.BufferStride[j];
/* We will validate doubles at a later stage */
if (prog->TransformFeedback.BufferStride[j] % 4) {
linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
"multiple of 4 or if its applied to a type that is "
"or contains a double a multiple of 8.",
prog->TransformFeedback.BufferStride[j]);
return;
}
if (prog->TransformFeedback.BufferStride[j] / 4 >
ctx->Const.MaxTransformFeedbackInterleavedComponents) {
linker_error(prog,
"The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
"limit has been exceeded.");
return;
}
}
}
}
/**
* Performs the cross-validation of tessellation control shader vertices and
@@ -2069,15 +2149,23 @@ link_intrastage_shaders(void *mem_ctx,
linked->ir = new(linked) exec_list;
clone_ir_list(mem_ctx, linked->ir, main->ir);
linked->BufferInterfaceBlocks = uniform_blocks;
linked->BufferInterfaceBlocks =
ralloc_array(linked, gl_uniform_block *, num_uniform_blocks);
ralloc_steal(linked, uniform_blocks);
for (unsigned i = 0; i < num_uniform_blocks; i++) {
linked->BufferInterfaceBlocks[i] = &uniform_blocks[i];
}
linked->NumBufferInterfaceBlocks = num_uniform_blocks;
ralloc_steal(linked, linked->BufferInterfaceBlocks);
link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
link_xfb_stride_layout_qualifiers(ctx, prog, linked, shader_list,
num_shaders);
populate_symbol_table(linked);
@@ -2869,7 +2957,8 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->InterfaceBlockStageIndex[j][i] != -1) {
struct gl_shader *sh = prog->_LinkedShaders[j];
int stage_index = prog->InterfaceBlockStageIndex[j][i];
if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) {
if (sh &&
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage) {
shader_blocks[j]++;
total_shader_storage_blocks++;
} else {
@@ -2986,7 +3075,8 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
int stage_index = prog->InterfaceBlockStageIndex[i][j];
if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage)
if (stage_index != -1 &&
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage)
total_shader_storage_blocks++;
}
@@ -3762,7 +3852,8 @@ write_top_level_array_size_and_stride:
* resource data.
*/
void
build_program_resource_list(struct gl_shader_program *shProg)
build_program_resource_list(struct gl_context *ctx,
struct gl_shader_program *shProg)
{
/* Rebuild resource list. */
if (shProg->ProgramResourceList) {
@@ -3820,6 +3911,17 @@ build_program_resource_list(struct gl_shader_program *shProg)
}
}
/* Add transform feedback buffers. */
for (unsigned i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) {
if ((shProg->LinkedTransformFeedback.ActiveBuffers >> i) & 1) {
shProg->LinkedTransformFeedback.Buffers[i].Binding = i;
if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_BUFFER,
&shProg->LinkedTransformFeedback.Buffers[i],
0))
return;
}
}
/* Add uniforms from uniform storage. */
for (unsigned i = 0; i < shProg->NumUniformStorage; i++) {
/* Do not add uniforms internally used by Mesa. */
@@ -4006,20 +4108,22 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
static void
split_ubos_and_ssbos(void *mem_ctx,
struct gl_uniform_block *blocks,
struct gl_uniform_block **s_blks,
struct gl_uniform_block *p_blks,
unsigned num_blocks,
struct gl_uniform_block ***ubos,
unsigned *num_ubos,
unsigned **ubo_interface_block_indices,
struct gl_uniform_block ***ssbos,
unsigned *num_ssbos,
unsigned **ssbo_interface_block_indices)
unsigned *num_ssbos)
{
unsigned num_ubo_blocks = 0;
unsigned num_ssbo_blocks = 0;
/* Are we spliting the list of blocks for the shader or the program */
bool is_shader = p_blks == NULL;
for (unsigned i = 0; i < num_blocks; i++) {
if (blocks[i].IsShaderStorage)
if (is_shader ? s_blks[i]->IsShaderStorage : p_blks[i].IsShaderStorage)
num_ssbo_blocks++;
else
num_ubo_blocks++;
@@ -4031,24 +4135,13 @@ split_ubos_and_ssbos(void *mem_ctx,
*ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
*num_ssbos = 0;
if (ubo_interface_block_indices)
*ubo_interface_block_indices =
ralloc_array(mem_ctx, unsigned, num_ubo_blocks);
if (ssbo_interface_block_indices)
*ssbo_interface_block_indices =
ralloc_array(mem_ctx, unsigned, num_ssbo_blocks);
for (unsigned i = 0; i < num_blocks; i++) {
if (blocks[i].IsShaderStorage) {
(*ssbos)[*num_ssbos] = &blocks[i];
if (ssbo_interface_block_indices)
(*ssbo_interface_block_indices)[*num_ssbos] = i;
struct gl_uniform_block *blk = is_shader ? s_blks[i] : &p_blks[i];
if (blk->IsShaderStorage) {
(*ssbos)[*num_ssbos] = blk;
(*num_ssbos)++;
} else {
(*ubos)[*num_ubos] = &blocks[i];
if (ubo_interface_block_indices)
(*ubo_interface_block_indices)[*num_ubos] = i;
(*ubos)[*num_ubos] = blk;
(*num_ubos)++;
}
}
@@ -4153,9 +4246,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
return;
}
tfeedback_decl *tfeedback_decls = NULL;
unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
unsigned num_tfeedback_decls = 0;
unsigned int num_explicit_uniform_locs = 0;
bool has_xfb_qualifiers = false;
char **varying_names = NULL;
tfeedback_decl *tfeedback_decls = NULL;
void *mem_ctx = ralloc_context(NULL); // temporary linker context
@@ -4465,6 +4560,30 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
goto done;
}
/* From the ARB_enhanced_layouts spec:
*
* "If the shader used to record output variables for transform feedback
* varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
* qualifiers, the values specified by TransformFeedbackVaryings are
* ignored, and the set of variables captured for transform feedback is
* instead derived from the specified layout qualifiers."
*/
for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
/* Find last stage before fragment shader */
if (prog->_LinkedShaders[i]) {
has_xfb_qualifiers =
process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
&num_tfeedback_decls,
&varying_names);
break;
}
}
if (!has_xfb_qualifiers) {
num_tfeedback_decls = prog->TransformFeedback.NumVarying;
varying_names = prog->TransformFeedback.VaryingNames;
}
if (num_tfeedback_decls != 0) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
@@ -4481,10 +4600,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
tfeedback_decls = ralloc_array(mem_ctx, tfeedback_decl,
prog->TransformFeedback.NumVarying);
num_tfeedback_decls);
if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
prog->TransformFeedback.VaryingNames,
tfeedback_decls))
varying_names, tfeedback_decls))
goto done;
}
@@ -4564,7 +4682,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls))
if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
has_xfb_qualifiers))
goto done;
update_array_sizes(prog);
@@ -4627,25 +4746,23 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
gl_shader *sh = prog->_LinkedShaders[i];
split_ubos_and_ssbos(sh,
sh->BufferInterfaceBlocks,
NULL,
sh->NumBufferInterfaceBlocks,
&sh->UniformBlocks,
&sh->NumUniformBlocks,
NULL,
&sh->ShaderStorageBlocks,
&sh->NumShaderStorageBlocks,
NULL);
&sh->NumShaderStorageBlocks);
}
}
split_ubos_and_ssbos(prog,
NULL,
prog->BufferInterfaceBlocks,
prog->NumBufferInterfaceBlocks,
&prog->UniformBlocks,
&prog->NumUniformBlocks,
&prog->UboInterfaceBlockIndex,
&prog->ShaderStorageBlocks,
&prog->NumShaderStorageBlocks,
&prog->SsboInterfaceBlockIndex);
&prog->NumShaderStorageBlocks);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
+2 -1
View File
@@ -197,7 +197,8 @@ private:
void recursion(const glsl_type *t, char **name, size_t name_length,
bool row_major, const glsl_type *record_type,
const unsigned packing,
bool last_field, unsigned record_array_count);
bool last_field, unsigned record_array_count,
const glsl_struct_field *named_ifc_member);
};
void
@@ -169,7 +169,6 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new(mem_ctx) ir_variable(iface_t->fields.structure[i].type,
var_name,
(ir_variable_mode) var->data.mode);
new_var->data.from_named_ifc_block_nonarray = 1;
} else {
const glsl_type *new_array_type =
process_array_type(var->type, i);
@@ -177,10 +176,16 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new(mem_ctx) ir_variable(new_array_type,
var_name,
(ir_variable_mode) var->data.mode);
new_var->data.from_named_ifc_block_array = 1;
}
new_var->data.location = iface_t->fields.structure[i].location;
new_var->data.explicit_location = (new_var->data.location >= 0);
new_var->data.offset = iface_t->fields.structure[i].offset;
new_var->data.explicit_xfb_offset =
(iface_t->fields.structure[i].offset >= 0);
new_var->data.xfb_buffer =
iface_t->fields.structure[i].xfb_buffer;
new_var->data.explicit_xfb_buffer =
iface_t->fields.structure[i].explicit_xfb_buffer;
new_var->data.interpolation =
iface_t->fields.structure[i].interpolation;
new_var->data.centroid = iface_t->fields.structure[i].centroid;
@@ -188,8 +193,9 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new_var->data.patch = iface_t->fields.structure[i].patch;
new_var->data.stream = var->data.stream;
new_var->data.how_declared = var->data.how_declared;
new_var->data.from_named_ifc_block = 1;
new_var->init_interface_type(iface_t);
new_var->init_interface_type(var->type);
hash_table_insert(interface_namespace, new_var,
iface_field_name);
insert_pos->insert_after(new_var);
@@ -211,12 +217,23 @@ ir_visitor_status
flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir)
{
ir_dereference_record *lhs_rec = ir->lhs->as_dereference_record();
ir_variable *lhs_var = ir->lhs->variable_referenced();
if (lhs_var && lhs_var->get_interface_type()) {
lhs_var->data.assigned = 1;
}
if (lhs_rec) {
ir_rvalue *lhs_rec_tmp = lhs_rec;
handle_rvalue(&lhs_rec_tmp);
if (lhs_rec_tmp != lhs_rec) {
ir->set_lhs(lhs_rec_tmp);
}
ir_variable *lhs_var = lhs_rec_tmp->variable_referenced();
if (lhs_var) {
lhs_var->data.assigned = 1;
}
}
return rvalue_visit(ir);
}
+2 -1
View File
@@ -43,7 +43,8 @@ extern void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
extern void
build_program_resource_list(struct gl_shader_program *shProg);
build_program_resource_list(struct gl_context *ctx,
struct gl_shader_program *shProg);
extern void
linker_error(struct gl_shader_program *prog, const char *fmt, ...)
@@ -130,11 +130,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
shProg->InterfaceBlockStageIndex[i] = NULL;
}
ralloc_free(shProg->UboInterfaceBlockIndex);
shProg->UboInterfaceBlockIndex = NULL;
ralloc_free(shProg->SsboInterfaceBlockIndex);
shProg->SsboInterfaceBlockIndex = NULL;
ralloc_free(shProg->AtomicBuffers);
shProg->AtomicBuffers = NULL;
shProg->NumAtomicBuffers = 0;
@@ -115,7 +115,6 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage,
prog->UniformStorage[index_to_set].name = (char *) name;
prog->UniformStorage[index_to_set].type = type;
prog->UniformStorage[index_to_set].array_elements = array_size;
prog->UniformStorage[index_to_set].initialized = false;
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
prog->UniformStorage[index_to_set].opaque[sh].index = ~0;
prog->UniformStorage[index_to_set].opaque[sh].active = false;
@@ -136,7 +135,6 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage,
prog->UniformStorage[i].name = (char *) "invalid slot";
prog->UniformStorage[i].type = glsl_type::void_type;
prog->UniformStorage[i].array_elements = 0;
prog->UniformStorage[i].initialized = false;
for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
prog->UniformStorage[i].opaque[sh].index = ~0;
prog->UniformStorage[i].opaque[sh].active = false;
@@ -149,21 +147,6 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage,
return red_zone_components;
}
/**
* Verify that the correct uniform is marked as having been initialized.
*/
static void
verify_initialization(struct gl_shader_program *prog, unsigned actual_index)
{
for (unsigned i = 0; i < prog->NumUniformStorage; i++) {
if (i == actual_index) {
EXPECT_TRUE(prog->UniformStorage[actual_index].initialized);
} else {
EXPECT_FALSE(prog->UniformStorage[i].initialized);
}
}
}
static void
non_array_test(void *mem_ctx, struct gl_shader_program *prog,
unsigned actual_index, const char *name,
@@ -181,7 +164,6 @@ non_array_test(void *mem_ctx, struct gl_shader_program *prog,
linker::set_uniform_initializer(mem_ctx, prog, name, type, val, 0xF00F);
verify_initialization(prog, actual_index);
verify_data(prog->UniformStorage[actual_index].storage, 0, val,
red_zone_components, 0xF00F);
}
@@ -338,7 +320,6 @@ array_test(void *mem_ctx, struct gl_shader_program *prog,
linker::set_uniform_initializer(mem_ctx, prog, name, element_type, val,
0xF00F);
verify_initialization(prog, actual_index);
verify_data(prog->UniformStorage[actual_index].storage, array_size,
val, red_zone_components, 0xF00F);
}
+49
View File
@@ -132,6 +132,10 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].image_volatile = fields[i].image_volatile;
this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
this->fields.structure[i].explicit_xfb_buffer =
fields[i].explicit_xfb_buffer;
this->fields.structure[i].xfb_buffer = fields[i].xfb_buffer;
this->fields.structure[i].xfb_stride = fields[i].xfb_stride;
}
mtx_unlock(&glsl_type::mutex);
@@ -172,6 +176,10 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].image_volatile = fields[i].image_volatile;
this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
this->fields.structure[i].explicit_xfb_buffer =
fields[i].explicit_xfb_buffer;
this->fields.structure[i].xfb_buffer = fields[i].xfb_buffer;
this->fields.structure[i].xfb_stride = fields[i].xfb_stride;
}
mtx_unlock(&glsl_type::mutex);
@@ -915,6 +923,15 @@ glsl_type::record_compare(const glsl_type *b) const
if (this->fields.structure[i].precision
!= b->fields.structure[i].precision)
return false;
if (this->fields.structure[i].explicit_xfb_buffer
!= b->fields.structure[i].explicit_xfb_buffer)
return false;
if (this->fields.structure[i].xfb_buffer
!= b->fields.structure[i].xfb_buffer)
return false;
if (this->fields.structure[i].xfb_stride
!= b->fields.structure[i].xfb_stride)
return false;
}
return true;
@@ -1333,6 +1350,38 @@ glsl_type::uniform_locations() const
}
}
unsigned
glsl_type::varying_count() const
{
unsigned size = 0;
switch (this->base_type) {
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
return 1;
case GLSL_TYPE_STRUCT:
case GLSL_TYPE_INTERFACE:
for (unsigned i = 0; i < this->length; i++)
size += this->fields.structure[i].type->varying_count();
return size;
case GLSL_TYPE_ARRAY:
/* Don't count innermost array elements */
if (this->without_array()->is_record() ||
this->without_array()->is_interface() ||
this->fields.array->is_array())
return this->length * this->fields.array->varying_count();
else
return this->fields.array->varying_count();
default:
assert(!"unsupported varying type");
return 0;
}
}
bool
glsl_type::can_implicitly_convert_to(const glsl_type *desired,
_mesa_glsl_parse_state *state) const
+27 -2
View File
@@ -326,6 +326,12 @@ struct glsl_type {
*/
unsigned uniform_locations() const;
/**
* Used to count the number of varyings contained in the type ignoring
* innermost array elements.
*/
unsigned varying_count() const;
/**
* Calculate the number of attribute slots required to hold this type
*
@@ -839,12 +845,24 @@ struct glsl_struct_field {
/**
* For interface blocks, members may have an explicit byte offset
* specified; -1 otherwise.
* specified; -1 otherwise. Also used for xfb_offset layout qualifier.
*
* Ignored for structs.
* Unless used for xfb_offset this field is ignored for structs.
*/
int offset;
/**
* For interface blocks, members may define a transform feedback buffer;
* -1 otherwise.
*/
int xfb_buffer;
/**
* For interface blocks, members may define a transform feedback stride;
* -1 otherwise.
*/
int xfb_stride;
/**
* For interface blocks, the interpolation mode (as in
* ir_variable::interpolation). 0 otherwise.
@@ -889,6 +907,13 @@ struct glsl_struct_field {
unsigned image_volatile:1;
unsigned image_restrict:1;
/**
* Any of the xfb_* qualifiers trigger the shader to be in transform
* feedback mode so we need to keep track of whether the buffer was
* explicitly set or if its just been assigned the default global value.
*/
unsigned explicit_xfb_buffer:1;
#ifdef __cplusplus
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
+2 -2
View File
@@ -22,10 +22,10 @@ NIR_FILES = \
nir_gather_info.c \
nir_gs_count_vertices.c \
nir_inline_functions.c \
nir_intrinsics.c \
nir_intrinsics.h \
nir_instr_set.c \
nir_instr_set.h \
nir_intrinsics.c \
nir_intrinsics.h \
nir_liveness.c \
nir_lower_alu_to_scalar.c \
nir_lower_atomics.c \
+1 -10
View File
@@ -143,16 +143,7 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
v2.run(sh->ir);
visit_exec_list(sh->ir, &v1);
nir_function *main = NULL;
nir_foreach_function(shader, func) {
if (strcmp(func->name, "main") == 0) {
main = func;
break;
}
}
assert(main);
nir_lower_outputs_to_temporaries(shader, main);
nir_lower_outputs_to_temporaries(shader, nir_shader_get_entrypoint(shader));
shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
+2
View File
@@ -1822,6 +1822,8 @@ nir_shader_get_entrypoint(nir_shader *shader)
assert(exec_list_length(&shader->functions) == 1);
struct exec_node *func_node = exec_list_get_head(&shader->functions);
nir_function *func = exec_node_data(nir_function, func_node, node);
assert(func->return_type == glsl_void_type());
assert(func->num_params == 0);
return func;
}
+5
View File
@@ -127,6 +127,7 @@ optimizations = [
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
(('bcsel', a, True, 'b@bool'), ('ior', a, b)),
(('fmin', a, a), a),
(('fmax', a, a), a),
(('imin', a, a), a),
@@ -270,6 +271,10 @@ optimizations = [
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
# Propagate negation up multiplication chains
(('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
# Misc. lowering
(('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
(('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
+1 -1
View File
@@ -31,7 +31,7 @@ extern "C" {
#endif
/**
* Shader stages. Note that these will become 5 with tessellation.
* Shader stages.
*
* The order must match how shaders are ordered in the pipeline.
* The GLSL linker assumes that if i<j, then the j-th shader is
+4
View File
@@ -537,6 +537,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
EGLint config_attrs[] = {
EGL_NATIVE_VISUAL_ID, 0,
EGL_NATIVE_VISUAL_TYPE, 0,
EGL_FRAMEBUFFER_TARGET_ANDROID, EGL_TRUE,
EGL_RECORDABLE_ANDROID, EGL_TRUE,
EGL_NONE
};
int count, i, j;
@@ -714,7 +716,9 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
goto cleanup_screen;
}
dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
dpy->Extensions.ANDROID_recordable = EGL_TRUE;
dpy->Extensions.KHR_image_base = EGL_TRUE;
/* Fill vtbl last to prevent accidentally calling virtual function during
+2
View File
@@ -381,7 +381,9 @@ _eglCreateExtensionsString(_EGLDisplay *dpy)
char *exts = dpy->ExtensionsString;
/* Please keep these sorted alphabetically. */
_EGL_CHECK_EXTENSION(ANDROID_framebuffer_target);
_EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
_EGL_CHECK_EXTENSION(ANDROID_recordable);
_EGL_CHECK_EXTENSION(CHROMIUM_sync_control);
+11 -1
View File
@@ -245,7 +245,13 @@ static const struct {
/* extensions */
{ EGL_Y_INVERTED_NOK, ATTRIB_TYPE_BOOLEAN,
ATTRIB_CRITERION_EXACT,
EGL_DONT_CARE }
EGL_DONT_CARE },
{ EGL_FRAMEBUFFER_TARGET_ANDROID, ATTRIB_TYPE_BOOLEAN,
ATTRIB_CRITERION_EXACT,
EGL_DONT_CARE },
{ EGL_RECORDABLE_ANDROID, ATTRIB_TYPE_BOOLEAN,
ATTRIB_CRITERION_EXACT,
EGL_DONT_CARE },
};
@@ -488,6 +494,10 @@ _eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr)
switch (attr) {
case EGL_Y_INVERTED_NOK:
return conf->Display->Extensions.NOK_texture_from_pixmap;
case EGL_FRAMEBUFFER_TARGET_ANDROID:
return conf->Display->Extensions.ANDROID_framebuffer_target;
case EGL_RECORDABLE_ANDROID:
return conf->Display->Extensions.ANDROID_recordable;
default:
break;
}
+4
View File
@@ -86,6 +86,8 @@ struct _egl_config
/* extensions */
EGLint YInvertedNOK;
EGLint FramebufferTargetAndroid;
EGLint RecordableAndroid;
};
@@ -133,6 +135,8 @@ _eglOffsetOfConfig(EGLint attr)
ATTRIB_MAP(EGL_CONFORMANT, Conformant);
/* extensions */
ATTRIB_MAP(EGL_Y_INVERTED_NOK, YInvertedNOK);
ATTRIB_MAP(EGL_FRAMEBUFFER_TARGET_ANDROID, FramebufferTargetAndroid);
ATTRIB_MAP(EGL_RECORDABLE_ANDROID, RecordableAndroid);
#undef ATTRIB_MAP
default:
return -1;
+2
View File
@@ -90,7 +90,9 @@ struct _egl_resource
struct _egl_extensions
{
/* Please keep these sorted alphabetically. */
EGLBoolean ANDROID_framebuffer_target;
EGLBoolean ANDROID_image_native_buffer;
EGLBoolean ANDROID_recordable;
EGLBoolean CHROMIUM_sync_control;
+18
View File
@@ -731,6 +731,24 @@ draw_texture_sampler(struct draw_context *draw,
}
}
/**
* Provide TGSI image objects for vertex/geometry shaders that use
* texture fetches. This state only needs to be set once per context.
* This might only be used by software drivers for the time being.
*/
void
draw_image(struct draw_context *draw,
uint shader,
struct tgsi_image *image)
{
if (shader == PIPE_SHADER_VERTEX) {
draw->vs.tgsi.image = image;
} else {
debug_assert(shader == PIPE_SHADER_GEOMETRY);
draw->gs.tgsi.image = image;
}
}
@@ -48,6 +48,7 @@ struct draw_vertex_shader;
struct draw_geometry_shader;
struct draw_fragment_shader;
struct tgsi_sampler;
struct tgsi_image;
/*
* structure to contain driver internal information
@@ -154,6 +155,11 @@ draw_texture_sampler(struct draw_context *draw,
uint shader_type,
struct tgsi_sampler *sampler);
void
draw_image(struct draw_context *draw,
uint shader_type,
struct tgsi_image *image);
void
draw_set_sampler_views(struct draw_context *draw,
unsigned shader_stage,
+1 -1
View File
@@ -681,7 +681,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(shader->machine,
shader->state.tokens,
draw->gs.tgsi.sampler);
draw->gs.tgsi.sampler, draw->gs.tgsi.image);
}
}
@@ -66,6 +66,7 @@ struct draw_stage;
struct vbuf_render;
struct tgsi_exec_machine;
struct tgsi_sampler;
struct tgsi_image;
struct draw_pt_front_end;
struct draw_assembler;
struct draw_llvm;
@@ -267,6 +268,7 @@ struct draw_context
struct tgsi_exec_machine *machine;
struct tgsi_sampler *sampler;
struct tgsi_image *image;
} tgsi;
struct translate *fetch;
@@ -286,6 +288,7 @@ struct draw_context
struct tgsi_exec_machine *machine;
struct tgsi_sampler *sampler;
struct tgsi_image *image;
} tgsi;
} gs;
+1 -1
View File
@@ -70,7 +70,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
if (evs->machine->Tokens != shader->state.tokens) {
tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
draw->vs.tgsi.sampler);
draw->vs.tgsi.sampler, draw->vs.tgsi.image);
}
}
@@ -128,7 +128,7 @@ lp_debug_dump_value(LLVMValueRef value)
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
static size_t
disassemble(const void* func, std::stringstream &buffer)
disassemble(const void* func, std::ostream &buffer)
{
const uint8_t *bytes = (const uint8_t *)func;
@@ -235,15 +235,16 @@ disassemble(const void* func, std::stringstream &buffer)
extern "C" void
lp_disassemble(LLVMValueRef func, const void *code) {
std::stringstream buffer;
lp_disassemble(LLVMValueRef func, const void *code)
{
std::ostringstream buffer;
std::string s;
buffer << LLVMGetValueName(func) << ":\n";
disassemble(code, buffer);
s = buffer.str();
_debug_printf("%s", s.c_str());
_debug_printf("\n");
os_log_message(s.c_str());
os_log_message("\n");
}
@@ -259,7 +260,6 @@ extern "C" void
lp_profile(LLVMValueRef func, const void *code)
{
#if defined(__linux__) && defined(PROFILE)
std::stringstream buffer;
static std::ofstream perf_asm_file;
static boolean first_time = TRUE;
static FILE *perf_map_file = NULL;
@@ -283,9 +283,9 @@ lp_profile(LLVMValueRef func, const void *code)
if (perf_map_file) {
const char *symbol = LLVMGetValueName(func);
unsigned long addr = (uintptr_t)code;
buffer << symbol << ":\n";
unsigned long size = disassemble(code, buffer);
perf_asm_file << buffer.rdbuf() << std::flush;
perf_asm_file << symbol << ":\n";
unsigned long size = disassemble(code, perf_asm_file);
perf_asm_file.flush();
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
fflush(perf_map_file);
}
+5 -3
View File
@@ -314,11 +314,13 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
res = LLVMBuildSelect(builder, mask, a, b, "");
}
else if (0) {
else if (HAVE_LLVM >= 0x0303) {
/* Generate a vector select.
*
* XXX: Using vector selects would avoid emitting intrinsics, but they aren't
* properly supported yet.
* Using vector selects would avoid emitting intrinsics, but they weren't
* properly supported yet for a long time.
*
* LLVM 3.3 appears to reliably support it.
*
* LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
*
@@ -108,14 +108,14 @@ struct fenced_manager
*/
struct fenced_buffer
{
/*
/**
* Immutable members.
*/
struct pb_buffer base;
struct fenced_manager *mgr;
/*
/**
* Following members are mutable and protected by fenced_manager::mutex.
*/
@@ -205,7 +205,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
curr = fenced_mgr->unfenced.next;
next = curr->next;
while(curr != &fenced_mgr->unfenced) {
while (curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
debug_printf("%10p %7u %8u %7s\n",
@@ -219,7 +219,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
curr = fenced_mgr->fenced.next;
next = curr->next;
while(curr != &fenced_mgr->fenced) {
while (curr != &fenced_mgr->fenced) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
@@ -340,7 +340,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
assert(pipe_is_referenced(&fenced_buf->base.reference));
assert(fenced_buf->fence);
if(fenced_buf->fence) {
if (fenced_buf->fence) {
struct pipe_fence_handle *fence = NULL;
int finished;
boolean proceed;
@@ -355,8 +355,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
assert(pipe_is_referenced(&fenced_buf->base.reference));
/*
* Only proceed if the fence object didn't change in the meanwhile.
/* Only proceed if the fence object didn't change in the meanwhile.
* Otherwise assume the work has been already carried out by another
* thread that re-aquired the lock before us.
*/
@@ -364,14 +363,9 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
ops->fence_reference(ops, &fence, NULL);
if(proceed && finished == 0) {
/*
* Remove from the fenced list
*/
boolean destroyed;
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
if (proceed && finished == 0) {
/* Remove from the fenced list. */
boolean destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
/* TODO: remove consequents buffers with the same fence? */
@@ -405,36 +399,33 @@ fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr,
curr = fenced_mgr->fenced.next;
next = curr->next;
while(curr != &fenced_mgr->fenced) {
while (curr != &fenced_mgr->fenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
if(fenced_buf->fence != prev_fence) {
int signaled;
if (fenced_buf->fence != prev_fence) {
int signaled;
if (wait) {
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
if (wait) {
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
/*
* Don't return just now. Instead preemptively check if the
* following buffers' fences already expired, without further waits.
*/
wait = FALSE;
}
else {
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
}
if (signaled != 0) {
return ret;
/* Don't return just now. Instead preemptively check if the
* following buffers' fences already expired, without further waits.
*/
wait = FALSE;
} else {
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
}
prev_fence = fenced_buf->fence;
}
else {
if (signaled != 0) {
return ret;
}
prev_fence = fenced_buf->fence;
} else {
/* This buffer's fence object is identical to the previous buffer's
* fence object, so no need to check the fence again.
*/
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
@@ -462,22 +453,21 @@ fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr)
curr = fenced_mgr->unfenced.next;
next = curr->next;
while(curr != &fenced_mgr->unfenced) {
while (curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
/*
* We can only move storage if the buffer is not mapped and not
/* We can only move storage if the buffer is not mapped and not
* validated.
*/
if(fenced_buf->buffer &&
if (fenced_buf->buffer &&
!fenced_buf->mapcount &&
!fenced_buf->vl) {
enum pipe_error ret;
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf);
if(ret == PIPE_OK) {
if (ret == PIPE_OK) {
ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf);
if(ret == PIPE_OK) {
if (ret == PIPE_OK) {
fenced_buffer_destroy_gpu_storage_locked(fenced_buf);
return TRUE;
}
@@ -499,7 +489,7 @@ fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr)
static void
fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf)
{
if(fenced_buf->data) {
if (fenced_buf->data) {
align_free(fenced_buf->data);
fenced_buf->data = NULL;
assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size);
@@ -516,14 +506,14 @@ fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
assert(!fenced_buf->data);
if(fenced_buf->data)
if (fenced_buf->data)
return PIPE_OK;
if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size)
return PIPE_ERROR_OUT_OF_MEMORY;
fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment);
if(!fenced_buf->data)
if (!fenced_buf->data)
return PIPE_ERROR_OUT_OF_MEMORY;
fenced_mgr->cpu_total_size += fenced_buf->size;
@@ -538,7 +528,7 @@ fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr,
static void
fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf)
{
if(fenced_buf->buffer) {
if (fenced_buf->buffer) {
pb_reference(&fenced_buf->buffer, NULL);
}
}
@@ -575,41 +565,37 @@ fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
{
assert(!fenced_buf->buffer);
/*
* Check for signaled buffers before trying to allocate.
*/
/* Check for signaled buffers before trying to allocate. */
fenced_manager_check_signalled_locked(fenced_mgr, FALSE);
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
/*
* Keep trying while there is some sort of progress:
/* Keep trying while there is some sort of progress:
* - fences are expiring,
* - or buffers are being being swapped out from GPU memory into CPU memory.
*/
while(!fenced_buf->buffer &&
while (!fenced_buf->buffer &&
(fenced_manager_check_signalled_locked(fenced_mgr, FALSE) ||
fenced_manager_free_gpu_storage_locked(fenced_mgr))) {
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
}
if(!fenced_buf->buffer && wait) {
/*
* Same as before, but this time around, wait to free buffers if
if (!fenced_buf->buffer && wait) {
/* Same as before, but this time around, wait to free buffers if
* necessary.
*/
while(!fenced_buf->buffer &&
while (!fenced_buf->buffer &&
(fenced_manager_check_signalled_locked(fenced_mgr, TRUE) ||
fenced_manager_free_gpu_storage_locked(fenced_mgr))) {
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
}
}
if(!fenced_buf->buffer) {
if(0)
if (!fenced_buf->buffer) {
if (0)
fenced_manager_dump_locked(fenced_mgr);
/* give up */
/* Give up. */
return PIPE_ERROR_OUT_OF_MEMORY;
}
@@ -686,18 +672,16 @@ fenced_buffer_map(struct pb_buffer *buf,
assert(!(flags & PB_USAGE_GPU_READ_WRITE));
/*
* Serialize writes.
*/
while((fenced_buf->flags & PB_USAGE_GPU_WRITE) ||
/* Serialize writes. */
while ((fenced_buf->flags & PB_USAGE_GPU_WRITE) ||
((fenced_buf->flags & PB_USAGE_GPU_READ) &&
(flags & PB_USAGE_CPU_WRITE))) {
/*
* Don't wait for the GPU to finish accessing it, if blocking is forbidden.
/* Don't wait for the GPU to finish accessing it,
* if blocking is forbidden.
*/
if((flags & PB_USAGE_DONTBLOCK) &&
ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) {
if ((flags & PB_USAGE_DONTBLOCK) &&
ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) {
goto done;
}
@@ -705,17 +689,15 @@ fenced_buffer_map(struct pb_buffer *buf,
break;
}
/*
* Wait for the GPU to finish accessing. This will release and re-acquire
/* Wait for the GPU to finish accessing. This will release and re-acquire
* the mutex, so all copies of mutable state must be discarded.
*/
fenced_buffer_finish_locked(fenced_mgr, fenced_buf);
}
if(fenced_buf->buffer) {
if (fenced_buf->buffer) {
map = pb_map(fenced_buf->buffer, flags, flush_ctx);
}
else {
} else {
assert(fenced_buf->data);
map = fenced_buf->data;
}
@@ -725,7 +707,7 @@ fenced_buffer_map(struct pb_buffer *buf,
fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE;
}
done:
done:
pipe_mutex_unlock(fenced_mgr->mutex);
return map;
@@ -741,12 +723,12 @@ fenced_buffer_unmap(struct pb_buffer *buf)
pipe_mutex_lock(fenced_mgr->mutex);
assert(fenced_buf->mapcount);
if(fenced_buf->mapcount) {
if (fenced_buf->mapcount) {
if (fenced_buf->buffer)
pb_unmap(fenced_buf->buffer);
--fenced_buf->mapcount;
if(!fenced_buf->mapcount)
fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE;
if (!fenced_buf->mapcount)
fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE;
}
pipe_mutex_unlock(fenced_mgr->mutex);
@@ -765,7 +747,7 @@ fenced_buffer_validate(struct pb_buffer *buf,
pipe_mutex_lock(fenced_mgr->mutex);
if (!vl) {
/* invalidate */
/* Invalidate. */
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
ret = PIPE_OK;
@@ -776,40 +758,37 @@ fenced_buffer_validate(struct pb_buffer *buf,
assert(!(flags & ~PB_USAGE_GPU_READ_WRITE));
flags &= PB_USAGE_GPU_READ_WRITE;
/* Buffer cannot be validated in two different lists */
if(fenced_buf->vl && fenced_buf->vl != vl) {
/* Buffer cannot be validated in two different lists. */
if (fenced_buf->vl && fenced_buf->vl != vl) {
ret = PIPE_ERROR_RETRY;
goto done;
}
if(fenced_buf->vl == vl &&
if (fenced_buf->vl == vl &&
(fenced_buf->validation_flags & flags) == flags) {
/* Nothing to do -- buffer already validated */
/* Nothing to do -- buffer already validated. */
ret = PIPE_OK;
goto done;
}
/*
* Create and update GPU storage.
*/
if(!fenced_buf->buffer) {
/* Create and update GPU storage. */
if (!fenced_buf->buffer) {
assert(!fenced_buf->mapcount);
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE);
if(ret != PIPE_OK) {
if (ret != PIPE_OK) {
goto done;
}
ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf);
if(ret != PIPE_OK) {
if (ret != PIPE_OK) {
fenced_buffer_destroy_gpu_storage_locked(fenced_buf);
goto done;
}
if(fenced_buf->mapcount) {
if (fenced_buf->mapcount) {
debug_printf("warning: validating a buffer while it is still mapped\n");
}
else {
} else {
fenced_buffer_destroy_cpu_storage_locked(fenced_buf);
}
}
@@ -821,7 +800,7 @@ fenced_buffer_validate(struct pb_buffer *buf,
fenced_buf->vl = vl;
fenced_buf->validation_flags |= flags;
done:
done:
pipe_mutex_unlock(fenced_mgr->mutex);
return ret;
@@ -841,13 +820,12 @@ fenced_buffer_fence(struct pb_buffer *buf,
assert(pipe_is_referenced(&fenced_buf->base.reference));
assert(fenced_buf->buffer);
if(fence != fenced_buf->fence) {
if (fence != fenced_buf->fence) {
assert(fenced_buf->vl);
assert(fenced_buf->validation_flags);
if (fenced_buf->fence) {
boolean destroyed;
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
boolean destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
assert(!destroyed);
}
if (fence) {
@@ -876,16 +854,15 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
pipe_mutex_lock(fenced_mgr->mutex);
/*
* This should only be called when the buffer is validated. Typically
/* This should only be called when the buffer is validated. Typically
* when processing relocations.
*/
assert(fenced_buf->vl);
assert(fenced_buf->buffer);
if(fenced_buf->buffer)
if (fenced_buf->buffer) {
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
else {
} else {
*base_buf = buf;
*offset = 0;
}
@@ -896,12 +873,12 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
static const struct pb_vtbl
fenced_buffer_vtbl = {
fenced_buffer_destroy,
fenced_buffer_map,
fenced_buffer_unmap,
fenced_buffer_validate,
fenced_buffer_fence,
fenced_buffer_get_base_buffer
fenced_buffer_destroy,
fenced_buffer_map,
fenced_buffer_unmap,
fenced_buffer_validate,
fenced_buffer_fence,
fenced_buffer_get_base_buffer
};
@@ -917,12 +894,11 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr,
struct fenced_buffer *fenced_buf;
enum pipe_error ret;
/*
* Don't stall the GPU, waste time evicting buffers, or waste memory
/* Don't stall the GPU, waste time evicting buffers, or waste memory
* trying to create a buffer that will most likely never fit into the
* graphics aperture.
*/
if(size > fenced_mgr->max_buffer_size) {
if (size > fenced_mgr->max_buffer_size) {
goto no_buffer;
}
@@ -942,29 +918,21 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr,
pipe_mutex_lock(fenced_mgr->mutex);
/*
* Try to create GPU storage without stalling,
*/
/* Try to create GPU storage without stalling. */
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE);
/*
* Attempt to use CPU memory to avoid stalling the GPU.
*/
if(ret != PIPE_OK) {
/* Attempt to use CPU memory to avoid stalling the GPU. */
if (ret != PIPE_OK) {
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf);
}
/*
* Create GPU storage, waiting for some to be available.
*/
if(ret != PIPE_OK) {
/* Create GPU storage, waiting for some to be available. */
if (ret != PIPE_OK) {
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE);
}
/*
* Give up.
*/
if(ret != PIPE_OK) {
/* Give up. */
if (ret != PIPE_OK) {
goto no_storage;
}
@@ -976,10 +944,10 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr,
return &fenced_buf->base;
no_storage:
no_storage:
pipe_mutex_unlock(fenced_mgr->mutex);
FREE(fenced_buf);
no_buffer:
no_buffer:
return NULL;
}
@@ -990,12 +958,12 @@ fenced_bufmgr_flush(struct pb_manager *mgr)
struct fenced_manager *fenced_mgr = fenced_manager(mgr);
pipe_mutex_lock(fenced_mgr->mutex);
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
while (fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
;
pipe_mutex_unlock(fenced_mgr->mutex);
assert(fenced_mgr->provider->flush);
if(fenced_mgr->provider->flush)
if (fenced_mgr->provider->flush)
fenced_mgr->provider->flush(fenced_mgr->provider);
}
@@ -1007,25 +975,25 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
pipe_mutex_lock(fenced_mgr->mutex);
/* Wait on outstanding fences */
/* Wait on outstanding fences. */
while (fenced_mgr->num_fenced) {
pipe_mutex_unlock(fenced_mgr->mutex);
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
sched_yield();
#endif
pipe_mutex_lock(fenced_mgr->mutex);
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
while (fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
;
}
#ifdef DEBUG
/*assert(!fenced_mgr->num_unfenced);*/
/* assert(!fenced_mgr->num_unfenced); */
#endif
pipe_mutex_unlock(fenced_mgr->mutex);
pipe_mutex_destroy(fenced_mgr->mutex);
if(fenced_mgr->provider)
if (fenced_mgr->provider)
fenced_mgr->provider->destroy(fenced_mgr->provider);
fenced_mgr->ops->destroy(fenced_mgr->ops);
+287 -7
View File
@@ -853,7 +853,8 @@ void
tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler)
struct tgsi_sampler *sampler,
struct tgsi_image *image)
{
uint k;
struct tgsi_parse_context parse;
@@ -871,6 +872,7 @@ tgsi_exec_machine_bind_shader(
mach->Tokens = tokens;
mach->Sampler = sampler;
mach->Image = image;
if (!tokens) {
/* unbind and free all */
@@ -1994,12 +1996,12 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
uint sampler)
{
uint unit;
uint unit = 0;
int i;
if (inst->Src[sampler].Register.Indirect) {
const struct tgsi_full_src_register *reg = &inst->Src[sampler];
union tgsi_exec_channel indir_index, index2;
const uint execmask = mach->ExecMask;
index2.i[0] =
index2.i[1] =
index2.i[2] =
@@ -2012,7 +2014,13 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
&index2,
&ZeroVec,
&indir_index);
unit = inst->Src[sampler].Register.Index + indir_index.i[0];
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
if (execmask & (1 << i)) {
unit = inst->Src[sampler].Register.Index + indir_index.i[i];
break;
}
}
} else {
unit = inst->Src[sampler].Register.Index;
}
@@ -2046,7 +2054,8 @@ exec_tex(struct tgsi_exec_machine *mach,
assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref);
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
assert(dim <= 4);
if (shadow_ref >= 0)
@@ -2145,7 +2154,7 @@ exec_lodq(struct tgsi_exec_machine *mach,
union tgsi_exec_channel r[2];
unit = fetch_sampler_unit(mach, inst, 1);
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
assert(dim <= Elements(coords));
/* fetch coordinates */
for (i = 0; i < dim; i++) {
@@ -3700,6 +3709,247 @@ exec_dfracexp(struct tgsi_exec_machine *mach,
}
}
static int
get_image_coord_dim(unsigned tgsi_tex)
{
int dim;
switch (tgsi_tex) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
dim = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
dim = 2;
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
case TGSI_TEXTURE_CUBE_ARRAY:
dim = 3;
break;
default:
assert(!"unknown texture target");
dim = 0;
break;
}
return dim;
}
static int
get_image_coord_sample(unsigned tgsi_tex)
{
int sample = 0;
switch (tgsi_tex) {
case TGSI_TEXTURE_2D_MSAA:
sample = 3;
break;
case TGSI_TEXTURE_2D_ARRAY_MSAA:
sample = 4;
break;
default:
break;
}
return sample;
}
static void
exec_load(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
uint unit;
int sample;
int i, j;
int dim;
uint chan;
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
struct tgsi_image_params params;
int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
unit = fetch_sampler_unit(mach, inst, 0);
dim = get_image_coord_dim(inst->Memory.Texture);
sample = get_image_coord_sample(inst->Memory.Texture);
assert(dim <= 3);
params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
params.unit = unit;
params.tgsi_tex_instr = inst->Memory.Texture;
params.format = inst->Memory.Format;
for (i = 0; i < dim; i++) {
IFETCH(&r[i], 1, TGSI_CHAN_X + i);
}
if (sample)
IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
mach->Image->load(mach->Image, &params,
r[0].i, r[1].i, r[2].i, sample_r.i,
rgba);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
r[0].f[j] = rgba[0][j];
r[1].f[j] = rgba[1][j];
r[2].f[j] = rgba[2][j];
r[3].f[j] = rgba[3][j];
}
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
}
}
}
static void
exec_store(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[3], sample_r;
union tgsi_exec_channel value[4];
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
struct tgsi_image_params params;
int dim;
int sample;
int i, j;
uint unit;
int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
unit = inst->Dst[0].Register.Index;
dim = get_image_coord_dim(inst->Memory.Texture);
sample = get_image_coord_sample(inst->Memory.Texture);
assert(dim <= 3);
params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
params.unit = unit;
params.tgsi_tex_instr = inst->Memory.Texture;
params.format = inst->Memory.Format;
for (i = 0; i < dim; i++) {
IFETCH(&r[i], 0, TGSI_CHAN_X + i);
}
for (i = 0; i < 4; i++) {
FETCH(&value[i], 1, TGSI_CHAN_X + i);
}
if (sample)
IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
rgba[0][j] = value[0].f[j];
rgba[1][j] = value[1].f[j];
rgba[2][j] = value[2].f[j];
rgba[3][j] = value[3].f[j];
}
mach->Image->store(mach->Image, &params,
r[0].i, r[1].i, r[2].i, sample_r.i,
rgba);
}
static void
exec_atomop(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
union tgsi_exec_channel r[4], sample_r;
union tgsi_exec_channel value[4], value2[4];
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
struct tgsi_image_params params;
int dim;
int sample;
int i, j;
uint unit, chan;
int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
unit = fetch_sampler_unit(mach, inst, 0);
dim = get_image_coord_dim(inst->Memory.Texture);
sample = get_image_coord_sample(inst->Memory.Texture);
assert(dim <= 3);
params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
params.unit = unit;
params.tgsi_tex_instr = inst->Memory.Texture;
params.format = inst->Memory.Format;
for (i = 0; i < dim; i++) {
IFETCH(&r[i], 1, TGSI_CHAN_X + i);
}
for (i = 0; i < 4; i++) {
FETCH(&value[i], 2, TGSI_CHAN_X + i);
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
FETCH(&value2[i], 3, TGSI_CHAN_X + i);
}
if (sample)
IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
rgba[0][j] = value[0].f[j];
rgba[1][j] = value[1].f[j];
rgba[2][j] = value[2].f[j];
rgba[3][j] = value[3].f[j];
}
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
rgba2[0][j] = value2[0].f[j];
rgba2[1][j] = value2[1].f[j];
rgba2[2][j] = value2[2].f[j];
rgba2[3][j] = value2[3].f[j];
}
}
mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
r[0].i, r[1].i, r[2].i, sample_r.i,
rgba, rgba2);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
r[0].f[j] = rgba[0][j];
r[1].f[j] = rgba[1][j];
r[2].f[j] = rgba[2][j];
r[3].f[j] = rgba[3][j];
}
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
}
}
}
static void
exec_resq(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
int result[4];
union tgsi_exec_channel r[4];
uint unit;
int i, chan, j;
struct tgsi_image_params params;
int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
unit = fetch_sampler_unit(mach, inst, 0);
params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
params.unit = unit;
params.tgsi_tex_instr = inst->Memory.Texture;
params.format = inst->Memory.Format;
mach->Image->get_dims(mach->Image, &params, result);
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
for (j = 0; j < 4; j++) {
r[j].i[i] = result[j];
}
}
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
TGSI_EXEC_DATA_INT);
}
}
}
static void
micro_i2f(union tgsi_exec_channel *dst,
@@ -5166,6 +5416,34 @@ exec_instruction(
case TGSI_OPCODE_D2U:
exec_d2u(mach, inst);
break;
case TGSI_OPCODE_LOAD:
exec_load(mach, inst);
break;
case TGSI_OPCODE_STORE:
exec_store(mach, inst);
break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMAND:
case TGSI_OPCODE_ATOMOR:
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
exec_atomop(mach, inst);
break;
case TGSI_OPCODE_RESQ:
exec_resq(mach, inst);
break;
case TGSI_OPCODE_BARRIER:
case TGSI_OPCODE_MEMBAR:
break;
default:
assert( 0 );
}
@@ -5193,6 +5471,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
default_mask = 0x1;
}
if (mach->NonHelperMask == 0)
mach->NonHelperMask = default_mask;
mach->CondMask = default_mask;
mach->LoopMask = default_mask;
mach->ContMask = default_mask;
+52 -4
View File
@@ -98,6 +98,46 @@ enum tgsi_sampler_control
TGSI_SAMPLER_GATHER,
};
struct tgsi_image_params {
unsigned unit;
unsigned tgsi_tex_instr;
enum pipe_format format;
unsigned execmask;
};
struct tgsi_image {
/* image interfaces */
void (*load)(const struct tgsi_image *image,
const struct tgsi_image_params *params,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
void (*store)(const struct tgsi_image *image,
const struct tgsi_image_params *params,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
void (*op)(const struct tgsi_image *image,
const struct tgsi_image_params *params,
unsigned opcode,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
void (*get_dims)(const struct tgsi_image *image,
const struct tgsi_image_params *params,
int dims[4]);
};
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
@@ -201,12 +241,13 @@ struct tgsi_sampler
#define TGSI_EXEC_NUM_TEMP_R 4
#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
#define TGSI_EXEC_NUM_ADDRS 3
/* predicate register */
#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9)
#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 11)
#define TGSI_EXEC_NUM_PREDS 1
#define TGSI_EXEC_NUM_TEMP_EXTRAS 10
#define TGSI_EXEC_NUM_TEMP_EXTRAS 12
@@ -292,6 +333,7 @@ struct tgsi_exec_machine
struct tgsi_sampler *Sampler;
struct tgsi_image *Image;
unsigned ImmLimit;
const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
@@ -311,6 +353,9 @@ struct tgsi_exec_machine
struct tgsi_exec_vector QuadPos;
float Face; /**< +1 if front facing, -1 if back facing */
bool flatshade_color;
/* See GLSL 4.50 specification for definition of helper invocations */
uint NonHelperMask; /**< non-helpers */
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
uint LoopMask; /**< For BGNLOOP/ENDLOOP */
@@ -378,7 +423,8 @@ void
tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
struct tgsi_sampler *sampler);
struct tgsi_sampler *sampler,
struct tgsi_image *image);
uint
tgsi_exec_machine_run(
@@ -451,8 +497,10 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return PIPE_MAX_SHADER_IMAGES;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
+53 -7
View File
@@ -54,6 +54,20 @@ is_memory_file(unsigned file)
}
/**
* Is the opcode a "true" texture instruction which samples from a
* texture map?
*/
static bool
is_texture_inst(unsigned opcode)
{
return (opcode != TGSI_OPCODE_TXQ &&
opcode != TGSI_OPCODE_TXQS &&
opcode != TGSI_OPCODE_TXQ_LZ &&
opcode != TGSI_OPCODE_LODQ &&
tgsi_get_opcode_info(opcode)->is_tex);
}
static void
scan_instruction(struct tgsi_shader_info *info,
const struct tgsi_full_instruction *fullinst,
@@ -181,15 +195,35 @@ scan_instruction(struct tgsi_shader_info *info,
info->indirect_files_read |= (1 << src->Register.File);
}
/* MSAA samplers */
/* Texture samplers */
if (src->Register.File == TGSI_FILE_SAMPLER) {
assert(fullinst->Instruction.Texture);
assert(src->Register.Index < Elements(info->is_msaa_sampler));
const unsigned index = src->Register.Index;
if (fullinst->Instruction.Texture &&
(fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
info->is_msaa_sampler[src->Register.Index] = TRUE;
assert(fullinst->Instruction.Texture);
assert(index < Elements(info->is_msaa_sampler));
assert(index < PIPE_MAX_SAMPLERS);
if (is_texture_inst(fullinst->Instruction.Opcode)) {
const unsigned target = fullinst->Texture.Texture;
assert(target < TGSI_TEXTURE_UNKNOWN);
/* for texture instructions, check that the texture instruction
* target matches the previous sampler view declaration (if there
* was one.)
*/
if (info->sampler_targets[index] == TGSI_TEXTURE_UNKNOWN) {
/* probably no sampler view declaration */
info->sampler_targets[index] = target;
} else {
/* Make sure the texture instruction's sampler/target info
* agrees with the sampler view declaration.
*/
assert(info->sampler_targets[index] == target);
}
/* MSAA samplers */
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
info->is_msaa_sampler[src->Register.Index] = TRUE;
}
}
}
@@ -431,6 +465,16 @@ scan_declaration(struct tgsi_shader_info *info,
}
} else if (file == TGSI_FILE_SAMPLER) {
info->samplers_declared |= 1 << reg;
} else if (file == TGSI_FILE_SAMPLER_VIEW) {
unsigned target = fulldecl->SamplerView.Resource;
assert(target < TGSI_TEXTURE_UNKNOWN);
if (info->sampler_targets[reg] == TGSI_TEXTURE_UNKNOWN) {
/* Save sampler target for this sampler index */
info->sampler_targets[reg] = target;
} else {
/* if previously declared, make sure targets agree */
assert(info->sampler_targets[reg] == target);
}
} else if (file == TGSI_FILE_IMAGE) {
if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER)
info->images_buffers |= 1 << reg;
@@ -493,6 +537,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
for (i = 0; i < Elements(info->const_file_max); i++)
info->const_file_max[i] = -1;
info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1;
for (i = 0; i < Elements(info->sampler_targets); i++)
info->sampler_targets[i] = TGSI_TEXTURE_UNKNOWN;
/**
** Setup to begin parsing input shader
+1
View File
@@ -65,6 +65,7 @@ struct tgsi_shader_info
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
unsigned samplers_declared; /**< bitmask of declared samplers */
ubyte sampler_targets[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /**< TGSI_TEXTURE_x values */
ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
+31 -38
View File
@@ -375,10 +375,8 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg)
* sample index.
*/
int
tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
tgsi_util_get_texture_coord_dim(unsigned tgsi_tex)
{
int dim;
/*
* Depending on the texture target, (src0.xyzw, src1.x) is interpreted
* differently:
@@ -407,8 +405,7 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
dim = 1;
break;
return 1;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_1D_ARRAY:
@@ -416,52 +413,48 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
dim = 2;
break;
return 2;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
dim = 3;
break;
return 3;
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
dim = 4;
break;
return 4;
default:
assert(!"unknown texture target");
dim = 0;
break;
return 0;
}
}
if (shadow_or_sample) {
switch (tgsi_tex) {
case TGSI_TEXTURE_SHADOW1D:
/* there is a gap */
*shadow_or_sample = 2;
break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
*shadow_or_sample = dim;
break;
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
*shadow_or_sample = 3;
break;
default:
/* no shadow nor sample */
*shadow_or_sample = -1;
break;
}
/**
* Given a TGSI_TEXTURE_x target, return the src register index for the
* shadow reference coordinate.
*/
int
tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex)
{
switch (tgsi_tex) {
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
return 2;
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
return 3;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
return 4;
default:
/* no shadow nor sample */
return -1;
}
return dim;
}
+4 -1
View File
@@ -80,7 +80,10 @@ struct tgsi_src_register
tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
int
tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);
tgsi_util_get_texture_coord_dim(unsigned tgsi_tex);
int
tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex);
boolean
tgsi_is_shadow_target(unsigned target);
+5 -5
View File
@@ -55,16 +55,16 @@ util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
dst->height != src->height)
return FALSE;
for (i = 0; i < Elements(src->cbufs); i++) {
if (dst->nr_cbufs != src->nr_cbufs) {
return FALSE;
}
for (i = 0; i < src->nr_cbufs; i++) {
if (dst->cbufs[i] != src->cbufs[i]) {
return FALSE;
}
}
if (dst->nr_cbufs != src->nr_cbufs) {
return FALSE;
}
if (dst->zsbuf != src->zsbuf) {
return FALSE;
}
+1 -1
View File
@@ -2095,7 +2095,7 @@ after lookup.
.. opcode:: SAMPLE
Using provided address, sample data from the specified texture using the
filtering mode identified by the gven sampler. The source data may come from
filtering mode identified by the given sampler. The source data may come from
any resource type other than buffers.
Syntax: ``SAMPLE dst, address, sampler_view, sampler``
@@ -1109,7 +1109,7 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
default:
compile_error(ctx, "Unhandled store deref type: %u\n",
darr->deref_array_type);
break;
return;
}
for (int i = 0; i < intr->num_components; i++) {
@@ -1258,7 +1258,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
ctx->frag_face = create_input(b, 0);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
}
dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
/* for fragface, we always get -1 or 0, but that is inverse
* of what nir expects (where ~0 is true). Unfortunately
* trying to widen from half to full in add.s seems to do a
* non-sign-extending widen (resulting in something that
* gets interpreted as float Inf??)
*/
dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32);
dst[0] = ir3_ADD_S(b, dst[0], 0, create_immed(b, 1), 0);
break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
@@ -740,7 +740,9 @@ fs_prepare_tgsi_sampling(struct fs_compile_context *fcc,
break;
}
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target);
ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target);
tsrc_transpose(inst->src[0], coords);
bias_or_lod = tsrc_null();
ref_or_si = tsrc_null();
@@ -407,7 +407,8 @@ vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
num_derivs = 0;
sampler_src = 1;
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target);
ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target);
/* extract the parameters */
switch (inst->opcode) {
@@ -177,9 +177,11 @@ struct nv50_ir_prog_info
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
uint16_t bufInfoBase; /* base address for buffer info */
uint16_t sampleInfoBase; /* base address for sample positions */
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */
} io;
/* driver callback to assign input/output locations */
@@ -1858,7 +1858,10 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
assert(i->defExists(1));
defId(i->def(1), 32 + 18);
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
defId(i->def(1), 8);
else
defId(i->def(1), 32 + 18);
}
}
@@ -3536,8 +3536,11 @@ Converter::exportOutputs()
Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
info->out[i].slot[c] * 4);
Value *val = oData.load(sub.cur->values, i, c, NULL);
if (val)
if (val) {
if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
mkOp1(OP_SAT, TYPE_F32, val, val);
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
}
}
}
}
@@ -874,7 +874,17 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
// This function is invoked after handleTEX lowering, so we have to expect
// the arguments in the order that the hw wants them. For Fermi, array and
// indirect are both in the leading arg, while for Kepler, array and
// indirect are separate (and both precede the coordinates). Maxwell is
// handled in a separate function.
unsigned array;
if (targ->getChipset() < NVISA_GK104_CHIPSET)
array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0;
else
array = i->tex.target.isArray() + (i->tex.rIndirectSrc >= 0);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -1063,13 +1073,115 @@ bool
NVC0LoweringPass::handleSUQ(Instruction *suq)
{
suq->op = OP_MOV;
suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
suq->setSrc(0, loadBufLength32(suq->getIndirect(0, 1),
suq->getSrc(0)->reg.fileIndex * 16));
suq->setIndirect(0, 0, NULL);
suq->setIndirect(0, 1, NULL);
return true;
}
void
NVC0LoweringPass::handleSharedATOMNVE4(Instruction *atom)
{
assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
BasicBlock *currBB = atom->bb;
BasicBlock *tryLockBB = atom->bb->splitBefore(atom, false);
BasicBlock *joinBB = atom->bb->splitAfter(atom);
BasicBlock *setAndUnlockBB = new BasicBlock(func);
BasicBlock *failLockBB = new BasicBlock(func);
bld.setPosition(currBB, true);
assert(!currBB->joinAt);
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
CmpInstruction *pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0), bld.mkImm(1));
bld.mkFlow(OP_BRA, tryLockBB, CC_ALWAYS, NULL);
currBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::TREE);
bld.setPosition(tryLockBB, true);
Instruction *ld =
bld.mkLoad(TYPE_U32, atom->getDef(0),
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
bld.mkFlow(OP_BRA, setAndUnlockBB, CC_P, ld->getDef(1));
bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL);
tryLockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::CROSS);
tryLockBB->cfg.attach(&setAndUnlockBB->cfg, Graph::Edge::TREE);
tryLockBB->cfg.detach(&joinBB->cfg);
bld.remove(atom);
bld.setPosition(setAndUnlockBB, true);
Value *stVal;
if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
// Read the old value, and write the new one.
stVal = atom->getSrc(1);
} else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
CmpInstruction *set =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(),
TYPE_U32, ld->getDef(0), atom->getSrc(1));
bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, (stVal = bld.getSSA()),
TYPE_U32, atom->getSrc(2), ld->getDef(0), set->getDef(0));
} else {
operation op;
switch (atom->subOp) {
case NV50_IR_SUBOP_ATOM_ADD:
op = OP_ADD;
break;
case NV50_IR_SUBOP_ATOM_AND:
op = OP_AND;
break;
case NV50_IR_SUBOP_ATOM_OR:
op = OP_OR;
break;
case NV50_IR_SUBOP_ATOM_XOR:
op = OP_XOR;
break;
case NV50_IR_SUBOP_ATOM_MIN:
op = OP_MIN;
break;
case NV50_IR_SUBOP_ATOM_MAX:
op = OP_MAX;
break;
default:
assert(0);
return;
}
stVal = bld.mkOp2v(op, atom->dType, bld.getSSA(), ld->getDef(0),
atom->getSrc(1));
}
Instruction *st =
bld.mkStore(OP_STORE, TYPE_U32,
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
NULL, stVal);
st->setDef(0, pred->getDef(0));
st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL);
setAndUnlockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::TREE);
// Lock until the store has not been performed.
bld.setPosition(failLockBB, true);
bld.mkFlow(OP_BRA, tryLockBB, CC_NOT_P, pred->getDef(0));
bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
failLockBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::BACK);
failLockBB->cfg.attach(&joinBB->cfg, Graph::Edge::TREE);
bld.setPosition(joinBB, false);
bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
}
void
NVC0LoweringPass::handleSharedATOM(Instruction *atom)
{
@@ -1176,11 +1288,16 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
handleSharedATOM(atom);
// For Fermi/Kepler, we have to use ld lock/st unlock to perform atomic
// operations on shared memory. For Maxwell, ATOMS is enough.
if (targ->getChipset() < NVISA_GK104_CHIPSET)
handleSharedATOM(atom);
else if (targ->getChipset() < NVISA_GM107_CHIPSET)
handleSharedATOMNVE4(atom);
return true;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
assert(base->reg.size == 8);
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
@@ -1204,9 +1321,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
// ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
return false;
if (targ->getChipset() < NVISA_GM107_CHIPSET) {
if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
// ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
return false;
}
}
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
@@ -1240,19 +1359,20 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
}
inline Value *
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off, uint16_t base)
{
uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
off += base;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
inline Value *
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off, uint16_t base)
{
uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
off += base;
if (ptr)
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
@@ -1262,10 +1382,10 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
}
inline Value *
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off, uint16_t base)
{
uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
off += base;
if (ptr)
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
@@ -1274,6 +1394,60 @@ NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
}
inline Value *
NVC0LoweringPass::loadSuInfo32(Value *ptr, uint32_t off)
{
return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
}
inline Value *
NVC0LoweringPass::loadSuInfo64(Value *ptr, uint32_t off)
{
return loadResInfo64(ptr, off, prog->driver->io.suInfoBase);
}
inline Value *
NVC0LoweringPass::loadSuLength32(Value *ptr, uint32_t off)
{
return loadResLength32(ptr, off, prog->driver->io.suInfoBase);
}
inline Value *
NVC0LoweringPass::loadBufInfo32(Value *ptr, uint32_t off)
{
return loadResInfo32(ptr, off, prog->driver->io.bufInfoBase);
}
inline Value *
NVC0LoweringPass::loadBufInfo64(Value *ptr, uint32_t off)
{
return loadResInfo64(ptr, off, prog->driver->io.bufInfoBase);
}
inline Value *
NVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off)
{
return loadResLength32(ptr, off, prog->driver->io.bufInfoBase);
}
inline Value *
NVC0LoweringPass::loadUboInfo32(Value *ptr, uint32_t off)
{
return loadResInfo32(ptr, off, prog->driver->io.uboInfoBase);
}
inline Value *
NVC0LoweringPass::loadUboInfo64(Value *ptr, uint32_t off)
{
return loadResInfo64(ptr, off, prog->driver->io.uboInfoBase);
}
inline Value *
NVC0LoweringPass::loadUboLength32(Value *ptr, uint32_t off)
{
return loadResLength32(ptr, off, prog->driver->io.uboInfoBase);
}
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
@@ -1354,8 +1528,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0));
Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1));
Value *ms_x = loadSuInfo32(NULL, base + NVE4_SU_INFO_MS(0));
Value *ms_y = loadSuInfo32(NULL, base + NVE4_SU_INFO_MS(1));
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
@@ -1408,9 +1582,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
for (c = 0; c < arg; ++c) {
src[c] = bld.getScratch();
if (c == 0 && raw)
v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
else
v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
->subOp = getSuClampSubOp(su, c);
}
@@ -1432,16 +1606,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
} else
if (dim == 3) {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_PITCH);
bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
} else {
assert(dim == 2);
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_PITCH);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
->subOp = su->tex.target.isArray() ?
NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
@@ -1452,7 +1626,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
if (raw) {
bf = src[0];
} else {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_FMT);
bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
}
@@ -1469,7 +1643,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
case 2:
z = off;
if (!su->tex.target.isArray()) {
z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
z = loadSuInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
subOp = NV50_IR_SUBOP_SUBFM_3D;
}
break;
@@ -1484,7 +1658,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
}
// part 2
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_ADDR);
if (su->tex.target == TEX_TARGET_BUFFER) {
eau = v;
@@ -1493,7 +1667,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
}
// add array layer offset
if (su->tex.target.isArray()) {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
v = loadSuInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
if (dim == 1)
bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
@@ -1533,7 +1707,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
// let's just set it 0 for raw access and hope it works
v = raw ?
bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
bld.mkImm(0) : loadSuInfo32(NULL, base + NVE4_SU_INFO_FMT);
// get rid of old coordinate sources, make space for fmt info and predicate
su->moveSources(arg, 3 - arg);
@@ -1644,6 +1818,100 @@ NVC0LoweringPass::handleWRSV(Instruction *i)
return true;
}
void
NVC0LoweringPass::handleLDST(Instruction *i)
{
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
i->getSrc(0)->reg.fileIndex = 0;
} else
if (prog->getType() == Program::TYPE_GEOMETRY &&
i->src(0).isIndirect(0)) {
// XXX: this assumes vec4 units
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
prog->getType() == Program::TYPE_COMPUTE) {
// The launch descriptor only allows to set up 8 CBs, but OpenGL
// requires at least 12 UBOs. To bypass this limitation, we store the
// addrs into the driver constbuf and we directly load from the global
// memory.
int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1;
Value *ind = i->getIndirect(0, 1);
Value *ptr = loadUboInfo64(ind, fileIndex * 16);
// TODO: clamp the offset to the maximum number of const buf.
if (i->src(0).isIndirect(1)) {
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
Value *length = loadUboLength32(ind, fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
bld.mkMov(i->getDef(0), bld.mkImm(0));
}
} else if (fileIndex >= 0) {
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
}
i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
}
} else if (i->src(0).isIndirect(1)) {
Value *ptr;
if (i->src(0).isIndirect(0))
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(0x1010),
i->getIndirect(0, 0));
else
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(16));
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
Value *ind = i->getIndirect(0, 1);
Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
// XXX come up with a way not to do this for EVERY little access but
// rather to batch these up somehow. Unfortunately we've lost the
// information about the field width by the time we get here.
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
Value *length = loadBufLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
bld.mkMov(i->getDef(0), bld.mkImm(0));
}
}
}
void
NVC0LoweringPass::readTessCoord(LValue *dst, int c)
{
@@ -1969,60 +2237,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleWRSV(i);
case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
i->getSrc(0)->reg.fileIndex = 0;
} else
if (prog->getType() == Program::TYPE_GEOMETRY &&
i->src(0).isIndirect(0)) {
// XXX: this assumes vec4 units
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
if (i->src(0).isIndirect(1)) {
Value *ptr;
if (i->src(0).isIndirect(0))
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(0x1010),
i->getIndirect(0, 0));
else
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(16));
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
Value *ind = i->getIndirect(0, 1);
Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
// XXX come up with a way not to do this for EVERY little access but
// rather to batch these up somehow. Unfortunately we've lost the
// information about the field width by the time we get here.
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
bld.mkMov(i->getDef(0), bld.mkImm(0));
}
}
handleLDST(i);
break;
case OP_ATOM:
{
@@ -106,6 +106,8 @@ protected:
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
void handleSharedATOM(Instruction *);
void handleSharedATOMNVE4(Instruction *);
void handleLDST(Instruction *);
void checkPredicate(Instruction *);
@@ -117,9 +119,18 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
Value *loadResInfo64(Value *ptr, uint32_t off);
Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
Value *loadSuInfo32(Value *ptr, uint32_t off);
Value *loadSuInfo64(Value *ptr, uint32_t off);
Value *loadSuLength32(Value *ptr, uint32_t off);
Value *loadBufInfo32(Value *ptr, uint32_t off);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
Value *loadUboInfo32(Value *ptr, uint32_t off);
Value *loadUboInfo64(Value *ptr, uint32_t off);
Value *loadUboLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
@@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
static bool
isShortRegVal(LValue *lval)
{
if (lval->defs.size() == 0)
if (lval->getInsn() == NULL)
return false;
for (Value::DefCIterator def = lval->defs.begin();
def != lval->defs.end(); ++def)
@@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
nodes[i].init(regs, lval);
RIG.insert(&nodes[i]);
if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
prog->getTarget()->getChipset() < 0xc0) {
Instruction *insn = lval->getInsn();
if (insn->op == OP_MAD || insn->op == OP_SAD)
@@ -67,9 +67,18 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
break;
}
if (bindings & PIPE_BIND_LINEAR)
if (util_format_is_depth_or_stencil(format) ||
(target != PIPE_TEXTURE_1D &&
target != PIPE_TEXTURE_2D &&
target != PIPE_TEXTURE_RECT) ||
sample_count > 1)
return false;
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_LINEAR |
PIPE_BIND_SHARED);
return (( nv50_format_table[format].usage |
@@ -110,9 +110,18 @@
/* 32 textures handles, at 1 32-bits integer each */
#define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4
#define NVC0_CB_AUX_TEX_SIZE (32 * 4)
/* 8 sets of 32-bits coordinate offsets */
#define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */
#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
/* block/grid size, at 3 32-bits integers each and gridid */
#define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */
#define NVC0_CB_AUX_GRID_SIZE (7 * 4)
/* 8 user clip planes, at 4 32-bits floats each */
#define NVC0_CB_AUX_UCP_INFO 0x100
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
/* 13 ubos, at 4 32-bits integer each */
#define NVC0_CB_AUX_UBO_INFO(i) 0x100 + (i) * 4 * 4 /* CP */
#define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
/* 8 sets of 32-bits integer pairs sample offsets */
#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
@@ -540,24 +540,24 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.auxCBSlot = 0;
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
} else {
info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.auxCBSlot = 7;
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.suInfoBase = 0; /* TODO */
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->io.suInfoBase = 0; /* TODO */
}
info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
info->io.suInfoBase = 0; /* TODO */
}
info->assignSlots = nvc0_program_assign_varying_slots;
+11 -3
View File
@@ -57,9 +57,18 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
if (util_format_get_blocksizebits(format) == 3 * 32)
return false;
if (bindings & PIPE_BIND_LINEAR)
if (util_format_is_depth_or_stencil(format) ||
(target != PIPE_TEXTURE_1D &&
target != PIPE_TEXTURE_2D &&
target != PIPE_TEXTURE_RECT) ||
sample_count > 1)
return false;
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_LINEAR |
PIPE_BIND_SHARED);
return (( nvc0_format_table[format].usage |
@@ -282,7 +291,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
if (class_3d >= NVE4_3D_CLASS)
if (class_3d == NVF0_3D_CLASS &&
!debug_get_bool_option("NVF0_COMPUTE", false))
return 0;
return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
@@ -311,8 +321,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return 65536;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
return NVC0_MAX_PIPE_CONSTBUFS;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return shader != PIPE_SHADER_FRAGMENT;
@@ -16,7 +16,6 @@
/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
#define NVC0_MAX_PIPE_CONSTBUFS 14
#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
#define NVC0_MAX_SURFACE_SLOTS 16
@@ -1295,6 +1295,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 |
NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
}
for (i = 1; i < n; ++i)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
if (nvc0->state.instance_elts) {
nvc0->state.instance_elts = 0;
BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
@@ -1303,6 +1305,17 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
}
nvc0->state.num_vtxelts = 2;
if (nvc0->state.prim_restart) {
IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
nvc0->state.prim_restart = 0;
}
if (nvc0->state.index_bias) {
IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
nvc0->state.index_bias = 0;
}
for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
if (info->dst.box.z + i) {
BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+217 -55
View File
@@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
int i;
int ret;
uint32_t obj_class;
uint64_t address;
switch (dev->chipset & ~0xf) {
case 0x100:
@@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
return ret;
}
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
&screen->parm);
if (ret)
return ret;
@@ -95,9 +96,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
* accessible. We cannot prevent that at the moment, so expect failure.
*/
BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
PUSH_DATA (push, 1 << 24);
PUSH_DATA (push, 0xff << 24);
BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
PUSH_DATA (push, 2 << 24);
PUSH_DATA (push, 0xfe << 24);
BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
@@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
}
BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
PUSH_DATA (push, 0); /* does not interefere with 3D */
PUSH_DATA (push, 7); /* does not interfere with 3D */
if (obj_class == NVF0_COMPUTE_CLASS)
IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
@@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 3); /* 7 */
PUSH_DATA (push, 1);
#ifdef DEBUG
#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
@@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
uint32_t mask;
unsigned i;
const unsigned t = 1;
uint64_t address;
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
mask = nvc0->surfaces_dirty[t];
while (mask) {
@@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
* directly instead of via binding points, so we have to supply them.
*/
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
@@ -271,6 +277,7 @@ static void
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
uint64_t address;
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
unsigned i, n;
@@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
n = util_logbase2(dirty) + 1 - i;
assert(n);
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, n * 4);
PUSH_DATA (push, 0x1);
@@ -301,6 +308,103 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
nvc0->samplers_dirty[s] = 0;
}
static void
nve4_compute_validate_constbufs(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const int s = 5;
while (nvc0->constbuf_dirty[s]) {
int i = ffs(nvc0->constbuf_dirty[s]) - 1;
nvc0->constbuf_dirty[s] &= ~(1 << i);
if (nvc0->constbuf[s][i].user) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
const unsigned base = NVC0_CB_USR_INFO(s);
const unsigned size = nvc0->constbuf[s][0].size;
assert(i == 0); /* we really only want OpenGL uniforms here */
assert(nvc0->constbuf[s][0].u.data);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, bo->offset + base);
PUSH_DATA (push, bo->offset + base);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, size);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4);
}
else {
struct nv04_resource *res =
nv04_resource(nvc0->constbuf[s][i].u.buf);
if (res) {
uint64_t address
= nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
assert(i > 0); /* we really only want uniform buffer objects */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 4 * 4);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
PUSH_DATA (push, nvc0->constbuf[5][i].size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
res->cb_bindings[s] |= 1 << i;
}
}
}
BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
static void
nve4_compute_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
uint64_t address;
const int s = 5;
int i;
address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(0));
PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(0));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 4 * NVC0_MAX_BUFFERS * 4);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4 * NVC0_MAX_BUFFERS);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
struct nv04_resource *res =
nv04_resource(nvc0->buffers[s][i].buffer);
PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
}
}
}
static struct nvc0_state_validate
validate_list_cp[] = {
{ nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },
@@ -310,6 +414,8 @@ validate_list_cp[] = {
NVC0_NEW_CP_SAMPLERS },
{ nve4_compute_validate_surfaces, NVC0_NEW_CP_SURFACES },
{ nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },
{ nve4_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },
{ nve4_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },
};
static bool
@@ -327,13 +433,16 @@ nve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
}
static void
nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
const uint *block_layout,
const uint *grid_layout)
nve4_compute_upload_input(struct nvc0_context *nvc0,
struct nve4_cp_launch_desc *desc,
const struct pipe_grid_info *info)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
uint64_t address;
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
if (cp->parm_size) {
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
@@ -344,18 +453,38 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, input, cp->parm_size / 4);
PUSH_DATAp(push, info->input, cp->parm_size / 4);
/* Bind user parameters coming from clover. */
/* TODO: This should be harmonized with uniform_bo. */
assert(!(desc->cb_mask & (1 << 0)));
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
}
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, block_layout, 3);
PUSH_DATAp(push, grid_layout, 3);
if (unlikely(info->indirect)) {
struct nv04_resource *res = nv04_resource(info->indirect);
uint32_t offset = res->offset + info->indirect_offset;
nouveau_pushbuf_space(push, 16, 0, 1);
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, info->block, 3);
nouveau_pushbuf_data(push, res->bo, offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
} else {
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, info->block, 3);
PUSH_DATAp(push, info->grid, 3);
}
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
@@ -375,24 +504,21 @@ nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
static void
nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
struct nve4_cp_launch_desc *desc,
uint32_t label,
const uint *block_layout,
const uint *grid_layout)
const struct pipe_grid_info *info)
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
unsigned i;
nve4_cp_launch_desc_init_default(desc);
desc->entry = nvc0_program_symbol_offset(cp, label);
desc->entry = nvc0_program_symbol_offset(cp, info->pc);
desc->griddim_x = grid_layout[0];
desc->griddim_y = grid_layout[1];
desc->griddim_z = grid_layout[2];
desc->blockdim_x = block_layout[0];
desc->blockdim_y = block_layout[1];
desc->blockdim_z = block_layout[2];
desc->griddim_x = info->grid[0];
desc->griddim_y = info->grid[1];
desc->griddim_z = info->grid[2];
desc->blockdim_x = info->block[0];
desc->blockdim_y = info->block[1];
desc->blockdim_z = info->block[2];
desc->shared_size = align(cp->cp.smem_size, 0x100);
desc->local_size_p = align(cp->cp.lmem_size, 0x10);
@@ -403,12 +529,15 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
desc->gpr_alloc = cp->num_gprs;
desc->bar_alloc = cp->num_barriers;
for (i = 0; i < 7; ++i) {
const unsigned s = 5;
if (nvc0->constbuf[s][i].u.buf)
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
// Only bind OpenGL uniforms and the driver constant buffer through the
// launch descriptor because UBOs are sticked to the driver cb to avoid the
// limitation of 8 CBs.
if (nvc0->constbuf[5][0].user) {
nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
}
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
NVC0_CB_AUX_INFO(5), 1 << 10);
}
static inline struct nve4_cp_launch_desc *
@@ -448,29 +577,62 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
if (ret)
goto out;
nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
info->block, info->grid);
nve4_compute_setup_launch_desc(nvc0, desc, info);
nve4_compute_upload_input(nvc0, desc, info);
#ifdef DEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0))
nve4_compute_dump_launch_desc(desc);
#endif
nve4_compute_upload_input(nvc0, info->input, info->block, info->grid);
if (unlikely(info->indirect)) {
struct nv04_resource *res = nv04_resource(info->indirect);
uint32_t offset = res->offset + info->indirect_offset;
/* upload the descriptor */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr);
PUSH_DATA (push, desc_gpuaddr);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 256);
PUSH_DATA (push, 1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
/* overwrite griddim_x and griddim_y as two 32-bits integers even
* if griddim_y must be a 16-bits integer */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr + 48);
PUSH_DATA (push, desc_gpuaddr + 48);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 8);
PUSH_DATA (push, 1);
nouveau_pushbuf_space(push, 16, 0, 1);
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (8 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
nouveau_pushbuf_data(push, res->bo, offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | 2 * 4);
/* overwrite the 16 high bits of griddim_y with griddim_z because
* we need (z << 16) | x */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr + 54);
PUSH_DATA (push, desc_gpuaddr + 54);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 4);
PUSH_DATA (push, 1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (4 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
nouveau_pushbuf_data(push, res->bo, offset + 8,
NVC0_IB_ENTRY_1_NO_PREFETCH | 1 * 4);
}
/* upload descriptor and flush */
#if 0
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr);
PUSH_DATA (push, desc_gpuaddr);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 256);
PUSH_DATA (push, 1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
#endif
BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
@@ -495,7 +657,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const unsigned s = 5;
unsigned i;
uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
uint32_t commands[2][32];
unsigned n[2] = { 0, 0 };
for (i = 0; i < nvc0->num_textures[s]; ++i) {
@@ -4,31 +4,6 @@
#include "nvc0/nve4_compute.xml.h"
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
*/
#define NVE4_CP_INPUT_USER 0x0000
#define NVE4_CP_INPUT_USER_LIMIT 0x1000
#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
#define NVE4_CP_INPUT_GRIDID 0x1018
#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
#define NVE4_CP_INPUT_TEX_STRIDE 4
#define NVE4_CP_INPUT_TEX_MAX 32
#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
#define NVE4_CP_INPUT_SUF_STRIDE 64
#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
#define NVE4_CP_INPUT_SUF_MAX 32
#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900
#define NVE4_CP_INPUT_TEMP_PTR 0x1908
#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910
#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918
#define NVE4_CP_INPUT_SIZE 0x1a00
#define NVE4_CP_PARAM_TRAP_INFO 0x2000
#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)
#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
struct nve4_cp_launch_desc
{
u32 unk0[8];
@@ -81,7 +56,7 @@ static inline void
nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
struct nouveau_bo *bo,
uint32_t base, uint16_t size)
uint32_t base, uint32_t size)
{
uint64_t address = bo->offset + base;
@@ -95,23 +70,6 @@ nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
desc->cb_mask |= 1 << index;
}
static inline void
nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
const struct nvc0_constbuf *cb)
{
assert(index < 8);
if (!cb->u.buf) {
desc->cb_mask &= ~(1 << index);
} else {
const struct nv04_resource *buf = nv04_resource(cb->u.buf);
assert(!cb->user);
nve4_cp_launch_desc_set_cb(desc, index,
buf->bo, buf->offset + cb->offset, cb->size);
}
}
struct nve4_mp_trap_info {
u32 lock;
u32 pc;
@@ -299,6 +299,11 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
if (usage & PIPE_BIND_TRANSFER_WRITE)
retval |= PIPE_BIND_TRANSFER_WRITE;
if ((usage & PIPE_BIND_LINEAR) &&
!util_format_is_compressed(format) &&
!(usage & PIPE_BIND_DEPTH_STENCIL))
retval |= PIPE_BIND_LINEAR;
return retval == usage;
}
+5
View File
@@ -239,6 +239,11 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
if (usage & PIPE_BIND_TRANSFER_WRITE)
retval |= PIPE_BIND_TRANSFER_WRITE;
if ((usage & PIPE_BIND_LINEAR) &&
!util_format_is_compressed(format) &&
!(usage & PIPE_BIND_DEPTH_STENCIL))
retval |= PIPE_BIND_LINEAR;
return retval == usage;
}
@@ -467,6 +467,8 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
case CHIP_POLARIS10: return "AMD POLARIS10";
case CHIP_POLARIS11: return "AMD POLARIS11";
case CHIP_STONEY: return "AMD STONEY";
default: return "AMD unknown";
}
@@ -597,6 +599,13 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
#else
case CHIP_FIJI: return "fiji";
case CHIP_STONEY: return "stoney";
#endif
#if HAVE_LLVM <= 0x0308
case CHIP_POLARIS10: return "tonga";
case CHIP_POLARIS11: return "tonga";
#else
case CHIP_POLARIS10: return "polaris10";
case CHIP_POLARIS11: return "polaris11";
#endif
default: return "";
}
+1 -1
View File
@@ -1066,7 +1066,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
item_mask = 0x3;
}
while(num_tile_pipes--) {
while (num_tile_pipes--) {
i = backend_map & item_mask;
mask |= (1<<i);
backend_map >>= item_width;
+1 -1
View File
@@ -335,7 +335,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
*/
if (resource->target != PIPE_BUFFER &&
(resource->nr_samples > 1 || rtex->is_depth))
return NULL;
return false;
if (!res->is_shared) {
res->is_shared = true;
+5 -1
View File
@@ -50,6 +50,7 @@
#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
/**
* flush commands to the hardware
@@ -408,7 +409,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
rscreen->info.drm_major == 3)
enc->use_vui = true;
if (rscreen->info.family >= CHIP_TONGA &&
rscreen->info.family != CHIP_STONEY)
rscreen->info.family != CHIP_STONEY &&
rscreen->info.family != CHIP_POLARIS11)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
if ((rscreen->info.family >= CHIP_TONGA) &&
@@ -482,6 +484,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
break;
case FW_52_0_3:
case FW_52_4_3:
radeon_vce_52_init(enc);
break;
@@ -514,6 +517,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
case FW_50_10_2:
case FW_50_17_3:
case FW_52_0_3:
case FW_52_4_3:
return true;
default:
return false;
@@ -124,6 +124,8 @@ enum radeon_family {
CHIP_CARRIZO,
CHIP_FIJI,
CHIP_STONEY,
CHIP_POLARIS10,
CHIP_POLARIS11,
CHIP_LAST,
};
+2
View File
@@ -598,6 +598,8 @@ static bool si_init_gs_info(struct si_screen *sscreen)
case CHIP_HAWAII:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
sscreen->gs_table_depth = 32;
return true;
default:
+5 -5
View File
@@ -39,6 +39,7 @@
#include "radeon/radeon_llvm_emit.h"
#include "util/u_memory.h"
#include "util/u_pstipple.h"
#include "util/u_string.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_util.h"
@@ -2874,8 +2875,7 @@ static LLVMValueRef image_fetch_coords(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Memory.Texture;
int sample;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &sample);
unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
LLVMValueRef coords[4];
LLVMValueRef tmp;
int chan;
@@ -3387,8 +3387,8 @@ static void tex_fetch_args(
unsigned target = inst->Texture.Texture;
LLVMValueRef coords[5], derivs[6];
LLVMValueRef address[16];
int ref_pos;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
unsigned count = 0;
unsigned chan;
unsigned num_deriv_channels = 0;
@@ -4996,7 +4996,7 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary
line = binary->disasm_string;
while (*line) {
p = strchrnul(line, '\n');
p = util_strchrnul(line, '\n');
count = p - line;
if (count) {
+13
View File
@@ -2046,6 +2046,11 @@ boolean si_is_format_supported(struct pipe_screen *screen,
if (usage & PIPE_BIND_TRANSFER_WRITE)
retval |= PIPE_BIND_TRANSFER_WRITE;
if ((usage & PIPE_BIND_LINEAR) &&
!util_format_is_compressed(format) &&
!(usage & PIPE_BIND_DEPTH_STENCIL))
retval |= PIPE_BIND_LINEAR;
return retval == usage;
}
@@ -3946,6 +3951,14 @@ static void si_init_config(struct si_context *sctx)
raster_config_1 = 0x0000002e;
}
break;
case CHIP_POLARIS10:
raster_config = 0x16000012;
raster_config_1 = 0x0000002a;
break;
case CHIP_POLARIS11:
raster_config = 0x16000012;
raster_config_1 = 0x00000000;
break;
case CHIP_TONGA:
raster_config = 0x16000012;
raster_config_1 = 0x0000002a;
@@ -10,6 +10,7 @@ C_SOURCES := \
sp_flush.h \
sp_fs_exec.c \
sp_fs.h \
sp_image.c \
sp_limits.h \
sp_prim_vbuf.c \
sp_prim_vbuf.h \
@@ -31,6 +32,7 @@ C_SOURCES := \
sp_state_blend.c \
sp_state_clip.c \
sp_state_derived.c \
sp_state_image.c \
sp_state.h \
sp_state_rasterizer.c \
sp_state_sampler.c \
+18 -2
View File
@@ -50,7 +50,7 @@
#include "sp_query.h"
#include "sp_screen.h"
#include "sp_tex_sample.h"
#include "sp_image.h"
static void
softpipe_destroy( struct pipe_context *pipe )
@@ -199,6 +199,10 @@ softpipe_create_context(struct pipe_screen *screen,
softpipe->tgsi.sampler[i] = sp_create_tgsi_sampler();
}
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
softpipe->tgsi.image[i] = sp_create_tgsi_image();
}
softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
@@ -216,6 +220,7 @@ softpipe_create_context(struct pipe_screen *screen,
softpipe_init_streamout_funcs(&softpipe->pipe);
softpipe_init_texture_funcs( &softpipe->pipe );
softpipe_init_vertex_funcs(&softpipe->pipe);
softpipe_init_image_funcs(&softpipe->pipe);
softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
@@ -223,7 +228,8 @@ softpipe_create_context(struct pipe_screen *screen,
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush_wrapped;
softpipe->pipe.texture_barrier = softpipe_texture_barrier;
softpipe->pipe.memory_barrier = softpipe_memory_barrier;
softpipe->pipe.render_condition = softpipe_render_condition;
/*
@@ -272,6 +278,16 @@ softpipe_create_context(struct pipe_screen *screen,
(struct tgsi_sampler *)
softpipe->tgsi.sampler[PIPE_SHADER_GEOMETRY]);
draw_image(softpipe->draw,
PIPE_SHADER_VERTEX,
(struct tgsi_image *)
softpipe->tgsi.image[PIPE_SHADER_VERTEX]);
draw_image(softpipe->draw,
PIPE_SHADER_GEOMETRY,
(struct tgsi_image *)
softpipe->tgsi.image[PIPE_SHADER_GEOMETRY]);
if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
@@ -83,6 +83,7 @@ struct softpipe_context {
struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
@@ -172,9 +173,12 @@ struct softpipe_context {
/** TGSI exec things */
struct {
struct sp_tgsi_sampler *sampler[PIPE_SHADER_TYPES];
struct sp_tgsi_image *image[PIPE_SHADER_TYPES];
} tgsi;
struct tgsi_exec_machine *fs_machine;
/** whether early depth testing is enabled */
bool early_depth;
/** The primitive drawing context */
struct draw_context *draw;
+26
View File
@@ -168,3 +168,29 @@ softpipe_flush_resource(struct pipe_context *pipe,
return TRUE;
}
void softpipe_texture_barrier(struct pipe_context *pipe)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
uint i, sh;
for (sh = 0; sh < Elements(softpipe->tex_cache); sh++) {
for (i = 0; i < softpipe->num_sampler_views[sh]; i++) {
sp_flush_tex_tile_cache(softpipe->tex_cache[sh][i]);
}
}
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
if (softpipe->cbuf_cache[i])
sp_flush_tile_cache(softpipe->cbuf_cache[i]);
if (softpipe->zsbuf_cache)
sp_flush_tile_cache(softpipe->zsbuf_cache);
softpipe->dirty_render_cache = FALSE;
}
void softpipe_memory_barrier(struct pipe_context *pipe, unsigned flags)
{
softpipe_texture_barrier(pipe);
}
+2
View File
@@ -55,4 +55,6 @@ softpipe_flush_resource(struct pipe_context *pipe,
boolean cpu_access,
boolean do_not_block);
void softpipe_texture_barrier(struct pipe_context *pipe);
void softpipe_memory_barrier(struct pipe_context *pipe, unsigned flags);
#endif
+15 -9
View File
@@ -62,14 +62,15 @@ sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
static void
exec_prepare( const struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler )
struct tgsi_sampler *sampler,
struct tgsi_image *image )
{
/*
* Bind tokens/shader to the interpreter's machine state.
*/
tgsi_exec_machine_bind_shader(machine,
var->tokens,
sampler);
sampler, image);
}
@@ -116,7 +117,8 @@ setup_pos_vector(const struct tgsi_interp_coef *coef,
static unsigned
exec_run( const struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine,
struct quad_header *quad )
struct quad_header *quad,
bool early_depth_test )
{
/* Compute X, Y, Z, W vals for this quad */
setup_pos_vector(quad->posCoef,
@@ -126,6 +128,7 @@ exec_run( const struct sp_fragment_shader_variant *var,
/* convert 0 to 1.0 and 1 to -1.0 */
machine->Face = (float) (quad->input.facing * -2 + 1);
machine->NonHelperMask = quad->inout.mask;
quad->inout.mask &= tgsi_exec_machine_run( machine );
if (quad->inout.mask == 0)
return FALSE;
@@ -155,16 +158,19 @@ exec_run( const struct sp_fragment_shader_variant *var,
{
uint j;
for (j = 0; j < 4; j++)
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
if (!early_depth_test) {
for (j = 0; j < 4; j++)
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
}
}
break;
case TGSI_SEMANTIC_STENCIL:
{
uint j;
for (j = 0; j < 4; j++)
quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].u[j];
if (!early_depth_test) {
for (j = 0; j < 4; j++)
quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].u[j];
}
}
break;
}
@@ -180,7 +186,7 @@ exec_delete(struct sp_fragment_shader_variant *var,
struct tgsi_exec_machine *machine)
{
if (machine->Tokens == var->tokens) {
tgsi_exec_machine_bind_shader(machine, NULL, NULL);
tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL);
}
FREE( (void *) var->tokens );
+762
View File
@@ -0,0 +1,762 @@
/*
* Copyright 2016 Red Hat.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sp_context.h"
#include "sp_image.h"
#include "sp_texture.h"
#include "util/u_format.h"
/*
* Get the offset into the base image
* first element for a buffer or layer/level for texture.
*/
static uint32_t
get_image_offset(const struct softpipe_resource *spr,
const struct pipe_image_view *iview,
enum pipe_format format, unsigned r_coord)
{
int base_layer = 0;
if (spr->base.target == PIPE_BUFFER)
return iview->u.buf.first_element * util_format_get_blocksize(format);
if (spr->base.target == PIPE_TEXTURE_1D_ARRAY ||
spr->base.target == PIPE_TEXTURE_2D_ARRAY ||
spr->base.target == PIPE_TEXTURE_CUBE_ARRAY ||
spr->base.target == PIPE_TEXTURE_CUBE ||
spr->base.target == PIPE_TEXTURE_3D)
base_layer = r_coord + iview->u.tex.first_layer;
return softpipe_get_tex_image_offset(spr, iview->u.tex.level, base_layer);
}
/*
* Does this texture instruction have a layer or depth parameter.
*/
static inline bool
has_layer_or_depth(unsigned tgsi_tex_instr)
{
return (tgsi_tex_instr == TGSI_TEXTURE_3D ||
tgsi_tex_instr == TGSI_TEXTURE_CUBE ||
tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY ||
tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY ||
tgsi_tex_instr == TGSI_TEXTURE_CUBE_ARRAY ||
tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY_MSAA);
}
/*
* Is this texture instruction a single non-array coordinate.
*/
static inline bool
has_1coord(unsigned tgsi_tex_instr)
{
return (tgsi_tex_instr == TGSI_TEXTURE_BUFFER ||
tgsi_tex_instr == TGSI_TEXTURE_1D ||
tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY);
}
/*
* check the bounds vs w/h/d
*/
static inline bool
bounds_check(int width, int height, int depth,
int s, int t, int r)
{
if (s < 0 || s >= width)
return false;
if (t < 0 || t >= height)
return false;
if (r < 0 || r >= depth)
return false;
return true;
}
/*
* Checks if the texture target compatible with the image resource
* pipe target.
*/
static inline bool
has_compat_target(unsigned pipe_target, unsigned tgsi_target)
{
switch (pipe_target) {
case PIPE_TEXTURE_1D:
if (tgsi_target == TGSI_TEXTURE_1D)
return true;
break;
case PIPE_TEXTURE_2D:
if (tgsi_target == TGSI_TEXTURE_2D)
return true;
break;
case PIPE_TEXTURE_RECT:
if (tgsi_target == TGSI_TEXTURE_RECT)
return true;
break;
case PIPE_TEXTURE_3D:
if (tgsi_target == TGSI_TEXTURE_3D ||
tgsi_target == TGSI_TEXTURE_2D)
return true;
break;
case PIPE_TEXTURE_CUBE:
if (tgsi_target == TGSI_TEXTURE_CUBE ||
tgsi_target == TGSI_TEXTURE_2D)
return true;
break;
case PIPE_TEXTURE_1D_ARRAY:
if (tgsi_target == TGSI_TEXTURE_1D ||
tgsi_target == TGSI_TEXTURE_1D_ARRAY)
return true;
break;
case PIPE_TEXTURE_2D_ARRAY:
if (tgsi_target == TGSI_TEXTURE_2D ||
tgsi_target == TGSI_TEXTURE_2D_ARRAY)
return true;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
if (tgsi_target == TGSI_TEXTURE_CUBE ||
tgsi_target == TGSI_TEXTURE_CUBE_ARRAY ||
tgsi_target == TGSI_TEXTURE_2D)
return true;
break;
case PIPE_BUFFER:
return (tgsi_target == TGSI_TEXTURE_BUFFER);
}
return false;
}
static bool
get_dimensions(const struct pipe_image_view *iview,
const struct softpipe_resource *spr,
unsigned tgsi_tex_instr,
enum pipe_format pformat,
unsigned *width,
unsigned *height,
unsigned *depth)
{
if (tgsi_tex_instr == TGSI_TEXTURE_BUFFER) {
*width = iview->u.buf.last_element - iview->u.buf.first_element + 1;
*height = 1;
*depth = 1;
/*
* Bounds check the buffer size from the view
* and the buffer size from the underlying buffer.
*/
if (util_format_get_stride(pformat, *width) >
util_format_get_stride(spr->base.format, spr->base.width0))
return false;
} else {
unsigned level;
level = spr->base.target == PIPE_BUFFER ? 0 : iview->u.tex.level;
*width = u_minify(spr->base.width0, level);
*height = u_minify(spr->base.height0, level);
if (spr->base.target == TGSI_TEXTURE_3D)
*depth = u_minify(spr->base.depth0, level);
else
*depth = spr->base.array_size;
/* Make sure the resource and view have compatiable formats */
if (util_format_get_blocksize(pformat) >
util_format_get_blocksize(spr->base.format))
return false;
}
return true;
}
static void
fill_coords(const struct tgsi_image_params *params,
unsigned index,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
int *s_coord, int *t_coord, int *r_coord)
{
*s_coord = s[index];
*t_coord = has_1coord(params->tgsi_tex_instr) ? 0 : t[index];
*r_coord = has_layer_or_depth(params->tgsi_tex_instr) ?
(params->tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY ? t[index] : r[index]) : 0;
}
/*
* Implement the image LOAD operation.
*/
static void
sp_tgsi_load(const struct tgsi_image *image,
const struct tgsi_image_params *params,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
struct pipe_image_view *iview;
struct softpipe_resource *spr;
unsigned width, height, depth;
unsigned stride;
int c, j;
char *data_ptr;
unsigned offset = 0;
if (params->unit > PIPE_MAX_SHADER_IMAGES)
goto fail_write_all_zero;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
if (!spr)
goto fail_write_all_zero;
if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
goto fail_write_all_zero;
if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
params->format, &width, &height, &depth))
return;
stride = util_format_get_stride(params->format, width);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int s_coord, t_coord, r_coord;
bool fill_zero = false;
if (!(params->execmask & (1 << j)))
fill_zero = true;
fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
if (!bounds_check(width, height, depth,
s_coord, t_coord, r_coord))
fill_zero = true;
if (fill_zero) {
int nc = util_format_get_nr_components(params->format);
int ival = util_format_is_pure_integer(params->format);
for (c = 0; c < 4; c++) {
rgba[c][j] = 0;
if (c == 3 && nc < 4) {
if (ival)
((int32_t *)rgba[c])[j] = 1;
else
rgba[c][j] = 1.0;
}
}
continue;
}
offset = get_image_offset(spr, iview, params->format, r_coord);
data_ptr = (char *)spr->data + offset;
if (util_format_is_pure_sint(params->format)) {
int32_t sdata[4];
util_format_read_4i(params->format,
sdata, 0,
data_ptr, stride,
s_coord, t_coord, 1, 1);
for (c = 0; c < 4; c++)
((int32_t *)rgba[c])[j] = sdata[c];
} else if (util_format_is_pure_uint(params->format)) {
uint32_t sdata[4];
util_format_read_4ui(params->format,
sdata, 0,
data_ptr, stride,
s_coord, t_coord, 1, 1);
for (c = 0; c < 4; c++)
((uint32_t *)rgba[c])[j] = sdata[c];
} else {
float sdata[4];
util_format_read_4f(params->format,
sdata, 0,
data_ptr, stride,
s_coord, t_coord, 1, 1);
for (c = 0; c < 4; c++)
rgba[c][j] = sdata[c];
}
}
return;
fail_write_all_zero:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
for (c = 0; c < 4; c++)
rgba[c][j] = 0;
}
return;
}
/*
* Implement the image STORE operation.
*/
static void
sp_tgsi_store(const struct tgsi_image *image,
const struct tgsi_image_params *params,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
struct pipe_image_view *iview;
struct softpipe_resource *spr;
unsigned width, height, depth;
unsigned stride;
char *data_ptr;
int j, c;
unsigned offset = 0;
unsigned pformat = params->format;
if (params->unit > PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
if (!spr)
return;
if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
return;
if (params->format == PIPE_FORMAT_NONE)
pformat = spr->base.format;
if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
pformat, &width, &height, &depth))
return;
stride = util_format_get_stride(pformat, width);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int s_coord, t_coord, r_coord;
if (!(params->execmask & (1 << j)))
continue;
fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
if (!bounds_check(width, height, depth,
s_coord, t_coord, r_coord))
continue;
offset = get_image_offset(spr, iview, pformat, r_coord);
data_ptr = (char *)spr->data + offset;
if (util_format_is_pure_sint(pformat)) {
int32_t sdata[4];
for (c = 0; c < 4; c++)
sdata[c] = ((int32_t *)rgba[c])[j];
util_format_write_4i(pformat, sdata, 0, data_ptr, stride,
s_coord, t_coord, 1, 1);
} else if (util_format_is_pure_uint(pformat)) {
uint32_t sdata[4];
for (c = 0; c < 4; c++)
sdata[c] = ((uint32_t *)rgba[c])[j];
util_format_write_4ui(pformat, sdata, 0, data_ptr, stride,
s_coord, t_coord, 1, 1);
} else {
float sdata[4];
for (c = 0; c < 4; c++)
sdata[c] = rgba[c][j];
util_format_write_4f(pformat, sdata, 0, data_ptr, stride,
s_coord, t_coord, 1, 1);
}
}
}
/*
* Implement atomic operations on unsigned integers.
*/
static void
handle_op_uint(const struct pipe_image_view *iview,
const struct tgsi_image_params *params,
bool just_read,
char *data_ptr,
uint qi,
unsigned stride,
unsigned opcode,
int s,
int t,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
uint c;
int nc = util_format_get_nr_components(params->format);
unsigned sdata[4];
util_format_read_4ui(params->format,
sdata, 0,
data_ptr, stride,
s, t, 1, 1);
if (just_read) {
for (c = 0; c < nc; c++) {
((uint32_t *)rgba[c])[qi] = sdata[c];
}
return;
}
switch (opcode) {
case TGSI_OPCODE_ATOMUADD:
for (c = 0; c < nc; c++) {
unsigned temp = sdata[c];
sdata[c] += ((uint32_t *)rgba[c])[qi];
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMXCHG:
for (c = 0; c < nc; c++) {
unsigned temp = sdata[c];
sdata[c] = ((uint32_t *)rgba[c])[qi];
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMCAS:
for (c = 0; c < nc; c++) {
unsigned dst_x = sdata[c];
unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
unsigned src_x = ((uint32_t *)rgba2[c])[qi];
unsigned temp = sdata[c];
sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMAND:
for (c = 0; c < nc; c++) {
unsigned temp = sdata[c];
sdata[c] &= ((uint32_t *)rgba[c])[qi];
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMOR:
for (c = 0; c < nc; c++) {
unsigned temp = sdata[c];
sdata[c] |= ((uint32_t *)rgba[c])[qi];
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMXOR:
for (c = 0; c < nc; c++) {
unsigned temp = sdata[c];
sdata[c] ^= ((uint32_t *)rgba[c])[qi];
((uint32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMUMIN:
for (c = 0; c < nc; c++) {
unsigned dst_x = sdata[c];
unsigned src_x = ((uint32_t *)rgba[c])[qi];
sdata[c] = MIN2(dst_x, src_x);
((uint32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMUMAX:
for (c = 0; c < nc; c++) {
unsigned dst_x = sdata[c];
unsigned src_x = ((uint32_t *)rgba[c])[qi];
sdata[c] = MAX2(dst_x, src_x);
((uint32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMIMIN:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((uint32_t *)rgba[c])[qi];
sdata[c] = MIN2(dst_x, src_x);
((uint32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMIMAX:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((uint32_t *)rgba[c])[qi];
sdata[c] = MAX2(dst_x, src_x);
((uint32_t *)rgba[c])[qi] = dst_x;
}
break;
default:
assert(!"Unexpected TGSI opcode in sp_tgsi_op");
break;
}
util_format_write_4ui(params->format, sdata, 0, data_ptr, stride,
s, t, 1, 1);
}
/*
* Implement atomic operations on signed integers.
*/
static void
handle_op_int(const struct pipe_image_view *iview,
const struct tgsi_image_params *params,
bool just_read,
char *data_ptr,
uint qi,
unsigned stride,
unsigned opcode,
int s,
int t,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
uint c;
int nc = util_format_get_nr_components(params->format);
int sdata[4];
util_format_read_4i(params->format,
sdata, 0,
data_ptr, stride,
s, t, 1, 1);
if (just_read) {
for (c = 0; c < nc; c++) {
((int32_t *)rgba[c])[qi] = sdata[c];
}
return;
}
switch (opcode) {
case TGSI_OPCODE_ATOMUADD:
for (c = 0; c < nc; c++) {
int temp = sdata[c];
sdata[c] += ((int32_t *)rgba[c])[qi];
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMXCHG:
for (c = 0; c < nc; c++) {
int temp = sdata[c];
sdata[c] = ((int32_t *)rgba[c])[qi];
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMCAS:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int cmp_x = ((int32_t *)rgba[c])[qi];
int src_x = ((int32_t *)rgba2[c])[qi];
int temp = sdata[c];
sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMAND:
for (c = 0; c < nc; c++) {
int temp = sdata[c];
sdata[c] &= ((int32_t *)rgba[c])[qi];
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMOR:
for (c = 0; c < nc; c++) {
int temp = sdata[c];
sdata[c] |= ((int32_t *)rgba[c])[qi];
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMXOR:
for (c = 0; c < nc; c++) {
int temp = sdata[c];
sdata[c] ^= ((int32_t *)rgba[c])[qi];
((int32_t *)rgba[c])[qi] = temp;
}
break;
case TGSI_OPCODE_ATOMUMIN:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((int32_t *)rgba[c])[qi];
sdata[c] = MIN2(dst_x, src_x);
((int32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMUMAX:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((int32_t *)rgba[c])[qi];
sdata[c] = MAX2(dst_x, src_x);
((int32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMIMIN:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((int32_t *)rgba[c])[qi];
sdata[c] = MIN2(dst_x, src_x);
((int32_t *)rgba[c])[qi] = dst_x;
}
break;
case TGSI_OPCODE_ATOMIMAX:
for (c = 0; c < nc; c++) {
int dst_x = sdata[c];
int src_x = ((int32_t *)rgba[c])[qi];
sdata[c] = MAX2(dst_x, src_x);
((int32_t *)rgba[c])[qi] = dst_x;
}
break;
default:
assert(!"Unexpected TGSI opcode in sp_tgsi_op");
break;
}
util_format_write_4i(params->format, sdata, 0, data_ptr, stride,
s, t, 1, 1);
}
/*
* Implement atomic image operations.
*/
static void
sp_tgsi_op(const struct tgsi_image *image,
const struct tgsi_image_params *params,
unsigned opcode,
const int s[TGSI_QUAD_SIZE],
const int t[TGSI_QUAD_SIZE],
const int r[TGSI_QUAD_SIZE],
const int sample[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
struct pipe_image_view *iview;
struct softpipe_resource *spr;
unsigned width, height, depth;
unsigned stride;
int j, c;
unsigned offset;
char *data_ptr;
if (params->unit > PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
if (!spr)
goto fail_write_all_zero;
if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
goto fail_write_all_zero;
if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
params->format, &width, &height, &depth))
goto fail_write_all_zero;
stride = util_format_get_stride(spr->base.format, width);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
int s_coord, t_coord, r_coord;
bool just_read = false;
fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
if (!bounds_check(width, height, depth,
s_coord, t_coord, r_coord)) {
int nc = util_format_get_nr_components(params->format);
int ival = util_format_is_pure_integer(params->format);
int c;
for (c = 0; c < 4; c++) {
rgba[c][j] = 0;
if (c == 3 && nc < 4) {
if (ival)
((int32_t *)rgba[c])[j] = 1;
else
rgba[c][j] = 1.0;
}
}
continue;
}
/* just readback the value for atomic if execmask isn't set */
if (!(params->execmask & (1 << j))) {
just_read = true;
}
offset = get_image_offset(spr, iview, params->format, r_coord);
data_ptr = (char *)spr->data + offset;
/* we should see atomic operations on r32 formats */
if (util_format_is_pure_uint(params->format))
handle_op_uint(iview, params, just_read, data_ptr, j, stride,
opcode, s_coord, t_coord, rgba, rgba2);
else if (util_format_is_pure_sint(params->format))
handle_op_int(iview, params, just_read, data_ptr, j, stride,
opcode, s_coord, t_coord, rgba, rgba2);
else
assert(0);
}
return;
fail_write_all_zero:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
for (c = 0; c < 4; c++)
rgba[c][j] = 0;
}
return;
}
static void
sp_tgsi_get_dims(const struct tgsi_image *image,
const struct tgsi_image_params *params,
int dims[4])
{
struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
struct pipe_image_view *iview;
struct softpipe_resource *spr;
int level;
if (params->unit > PIPE_MAX_SHADER_IMAGES)
return;
iview = &sp_img->sp_iview[params->unit];
spr = (struct softpipe_resource *)iview->resource;
if (!spr)
return;
if (params->tgsi_tex_instr == TGSI_TEXTURE_BUFFER) {
dims[0] = iview->u.buf.last_element - iview->u.buf.first_element + 1;
dims[1] = dims[2] = dims[3] = 0;
return;
}
level = iview->u.tex.level;
dims[0] = u_minify(spr->base.width0, level);
switch (params->tgsi_tex_instr) {
case TGSI_TEXTURE_1D_ARRAY:
dims[1] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1;
/* fallthrough */
case TGSI_TEXTURE_1D:
return;
case TGSI_TEXTURE_2D_ARRAY:
dims[2] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1;
/* fallthrough */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_RECT:
dims[1] = u_minify(spr->base.height0, level);
return;
case TGSI_TEXTURE_3D:
dims[1] = u_minify(spr->base.height0, level);
dims[2] = u_minify(spr->base.depth0, level);
return;
case TGSI_TEXTURE_CUBE_ARRAY:
dims[1] = u_minify(spr->base.height0, level);
dims[2] = (iview->u.tex.last_layer - iview->u.tex.first_layer + 1) / 6;
break;
default:
assert(!"unexpected texture target in sp_get_dims()");
return;
}
}
struct sp_tgsi_image *
sp_create_tgsi_image(void)
{
struct sp_tgsi_image *img = CALLOC_STRUCT(sp_tgsi_image);
if (!img)
return NULL;
img->base.load = sp_tgsi_load;
img->base.store = sp_tgsi_store;
img->base.op = sp_tgsi_op;
img->base.get_dims = sp_tgsi_get_dims;
return img;
};
+37
View File
@@ -0,0 +1,37 @@
/*
* Copyright 2016 Red Hat.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SP_IMAGE_H
#define SP_IMAGE_H
#include "tgsi/tgsi_exec.h"
struct sp_tgsi_image
{
struct tgsi_image base;
struct pipe_image_view sp_iview[PIPE_MAX_SHADER_IMAGES];
};
struct sp_tgsi_image *
sp_create_tgsi_image(void);
#endif
@@ -782,7 +782,7 @@ depth_test_quads_fallback(struct quad_stage *qs,
{
unsigned i, pass = 0;
const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
boolean interp_depth = !fsInfo->writes_z;
boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
boolean shader_stencil_ref = fsInfo->writes_stencil;
struct depth_data data;
unsigned vp_idx = quads[0]->input.viewport_index;
@@ -902,7 +902,7 @@ choose_depth_test(struct quad_stage *qs,
{
const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
boolean interp_depth = !fsInfo->writes_z;
boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
+1 -1
View File
@@ -80,7 +80,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
/* run shader */
machine->flatshade_color = softpipe->rasterizer->flatshade ? TRUE : FALSE;
return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad );
return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad, softpipe->early_depth );
}
+4 -2
View File
@@ -43,15 +43,17 @@ void
sp_build_quad_pipeline(struct softpipe_context *sp)
{
boolean early_depth_test =
sp->depth_stencil->depth.enabled &&
(sp->depth_stencil->depth.enabled &&
sp->framebuffer.zsbuf &&
!sp->depth_stencil->alpha.enabled &&
!sp->fs_variant->info.uses_kill &&
!sp->fs_variant->info.writes_z &&
!sp->fs_variant->info.writes_stencil;
!sp->fs_variant->info.writes_stencil) ||
sp->fs_variant->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL];
sp->quad.first = sp->quad.blend;
sp->early_depth = early_depth_test;
if (early_depth_test) {
insert_stage_at_head( sp, sp->quad.shade );
insert_stage_at_head( sp, sp->quad.depth_test );
+8 -2
View File
@@ -56,6 +56,7 @@
struct tgsi_sampler;
struct tgsi_image;
struct tgsi_exec_machine;
struct vertex_info;
@@ -81,11 +82,13 @@ struct sp_fragment_shader_variant
void (*prepare)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *sampler);
struct tgsi_sampler *sampler,
struct tgsi_image *image);
unsigned (*run)(const struct sp_fragment_shader_variant *shader,
struct tgsi_exec_machine *machine,
struct quad_header *quad);
struct quad_header *quad,
bool early_depth_test);
/* Deletes this instance of the object */
void (*delete)(struct sp_fragment_shader_variant *shader,
@@ -148,6 +151,9 @@ softpipe_init_streamout_funcs(struct pipe_context *pipe);
void
softpipe_init_vertex_funcs(struct pipe_context *pipe);
void
softpipe_init_image_funcs(struct pipe_context *pipe);
void
softpipe_set_framebuffer_state(struct pipe_context *,
const struct pipe_framebuffer_state *);
@@ -343,7 +343,8 @@ update_fragment_shader(struct softpipe_context *softpipe, unsigned prim)
softpipe->fs_variant->prepare(softpipe->fs_variant,
softpipe->fs_machine,
(struct tgsi_sampler *) softpipe->
tgsi.sampler[PIPE_SHADER_FRAGMENT]);
tgsi.sampler[PIPE_SHADER_FRAGMENT],
(struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_FRAGMENT]);
}
else {
softpipe->fs_variant = NULL;
@@ -0,0 +1,57 @@
/*
* Copyright 2016 Red Hat.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sp_context.h"
#include "sp_state.h"
#include "sp_image.h"
static void softpipe_set_shader_images(struct pipe_context *pipe,
unsigned shader,
unsigned start,
unsigned num,
struct pipe_image_view *images)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
unsigned i;
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= Elements(softpipe->sampler_views[shader]));
/* set the new images */
for (i = 0; i < num; i++) {
int idx = start + i;
if (images) {
pipe_resource_reference(&softpipe->tgsi.image[shader]->sp_iview[idx].resource, images[i].resource);
softpipe->tgsi.image[shader]->sp_iview[idx] = images[i];
}
else {
pipe_resource_reference(&softpipe->tgsi.image[shader]->sp_iview[idx].resource, NULL);
memset(&softpipe->tgsi.image[shader]->sp_iview[idx], 0, sizeof(struct pipe_image_view));
}
}
}
void softpipe_init_image_funcs(struct pipe_context *pipe)
{
pipe->set_shader_images = softpipe_set_shader_images;
}
+4 -4
View File
@@ -270,9 +270,9 @@ softpipe_resource_get_handle(struct pipe_screen *screen,
* Helper function to compute offset (in bytes) for a particular
* texture level/face/slice from the start of the buffer.
*/
static unsigned
sp_get_tex_image_offset(const struct softpipe_resource *spr,
unsigned level, unsigned layer)
unsigned
softpipe_get_tex_image_offset(const struct softpipe_resource *spr,
unsigned level, unsigned layer)
{
unsigned offset = spr->level_offset[level];
@@ -422,7 +422,7 @@ softpipe_transfer_map(struct pipe_context *pipe,
pt->stride = spr->stride[level];
pt->layer_stride = spr->img_stride[level];
spt->offset = sp_get_tex_image_offset(spr, level, box->z);
spt->offset = softpipe_get_tex_image_offset(spr, level, box->z);
spt->offset +=
box->y / util_format_get_blockheight(format) * spt->base.stride +
+3 -1
View File
@@ -116,5 +116,7 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen);
extern void
softpipe_init_texture_funcs(struct pipe_context *pipe);
unsigned
softpipe_get_tex_image_offset(const struct softpipe_resource *spr,
unsigned level, unsigned layer);
#endif /* SP_TEXTURE */
+2 -10
View File
@@ -50,15 +50,6 @@
*/
static char err_buf[128];
#if 0
static void
svga_destroy_shader_emitter(struct svga_shader_emitter *emit)
{
if (emit->buf != err_buf)
FREE(emit->buf);
}
#endif
static boolean
svga_shader_expand(struct svga_shader_emitter *emit)
@@ -265,6 +256,7 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga,
fail:
FREE(variant);
FREE(emit.buf);
if (emit.buf != err_buf)
FREE(emit.buf);
return NULL;
}
+16 -3
View File
@@ -535,7 +535,6 @@ svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
static boolean
ps30_sampler( struct svga_shader_emitter *emit,
struct tgsi_declaration_semantic semantic,
unsigned idx )
{
SVGA3DOpDclArgs dcl;
@@ -553,6 +552,17 @@ ps30_sampler( struct svga_shader_emitter *emit,
svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
}
boolean
svga_shader_emit_samplers_decl( struct svga_shader_emitter *emit )
{
unsigned i;
for (i = 0; i < emit->num_samplers; i++) {
if (!ps30_sampler(emit, i))
return FALSE;
}
return TRUE;
}
boolean
svga_translate_decl_sm30( struct svga_shader_emitter *emit,
@@ -563,12 +573,15 @@ svga_translate_decl_sm30( struct svga_shader_emitter *emit,
unsigned idx;
for( idx = first; idx <= last; idx++ ) {
boolean ok;
boolean ok = TRUE;
switch (decl->Declaration.File) {
case TGSI_FILE_SAMPLER:
assert (emit->unit == PIPE_SHADER_FRAGMENT);
ok = ps30_sampler( emit, decl->Semantic, idx );
/* just keep track of the number of samplers here.
* Will emit the declaration in the helpers function.
*/
emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
break;
case TGSI_FILE_INPUT:
@@ -137,6 +137,7 @@ struct svga_shader_emitter
unsigned pstipple_sampler_unit;
int num_samplers;
uint8_t sampler_target[PIPE_MAX_SAMPLERS];
};
@@ -156,6 +157,9 @@ boolean
svga_shader_emit_instructions(struct svga_shader_emitter *emit,
const struct tgsi_token *tokens);
boolean
svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit);
boolean
svga_translate_decl_sm30(struct svga_shader_emitter *emit,
const struct tgsi_full_declaration *decl);

Some files were not shown because too many files have changed in this diff Show More