diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 17ba18fca30..87e20596716 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1022,6 +1022,7 @@ struct brw_wm_prog_data { * For varying slots that are not used by the FS, the value is -1. */ int urb_setup[VARYING_SLOT_MAX]; + int urb_setup_channel[VARYING_SLOT_MAX]; /** * Cache structure into the urb_setup array above that contains the @@ -1625,6 +1626,7 @@ struct brw_tue_map { struct brw_mue_map { int32_t start_dw[VARYING_SLOT_MAX]; + uint32_t len_dw[VARYING_SLOT_MAX]; uint32_t per_primitive_indices_dw; uint32_t size_dw; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2f2b27c90a4..2f6d33cef7e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1764,10 +1764,10 @@ calculate_urb_setup(const struct intel_device_info *devinfo, const nir_shader *nir, const struct brw_mue_map *mue_map) { - memset(prog_data->urb_setup, -1, - sizeof(prog_data->urb_setup[0]) * VARYING_SLOT_MAX); + memset(prog_data->urb_setup, -1, sizeof(prog_data->urb_setup)); + memset(prog_data->urb_setup_channel, 0, sizeof(prog_data->urb_setup_channel)); - int urb_next = 0; + int urb_next = 0; /* in vec4s */ const uint64_t inputs_read = nir->info.inputs_read & ~nir->info.per_primitive_inputs; @@ -1782,6 +1782,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo, uint64_t per_prim_inputs_read = nir->info.inputs_read & nir->info.per_primitive_inputs; + unsigned per_prim_start_dw = mue_map->per_primitive_start_dw; + unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw; + /* In Mesh, PRIMITIVE_SHADING_RATE, VIEWPORT and LAYER slots * are always at the beginning, because they come from MUE * Primitive Header, not Per-Primitive Attributes. @@ -1789,8 +1792,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo, const uint64_t primitive_header_bits = VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_SHADING_RATE; + bool reads_header = (per_prim_inputs_read & primitive_header_bits) != 0; - if (per_prim_inputs_read & primitive_header_bits) { + if (reads_header) { /* Primitive Shading Rate, Layer and Viewport live in the same * 4-dwords slot (psr is dword 0, layer is dword 1, and viewport * is dword 2). @@ -1804,23 +1808,30 @@ calculate_urb_setup(const struct intel_device_info *devinfo, if (per_prim_inputs_read & VARYING_BIT_VIEWPORT) prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0; - /* 3DSTATE_SBE_MESH.Per[Primitive|Vertex]URBEntryOutputRead[Offset|Length] - * are in full GRFs (8 dwords) and MUE Primitive Header is 8 dwords, - * so next per-primitive attribute must be placed in slot 2 (each slot - * is 4 dwords long). - */ - urb_next = 2; per_prim_inputs_read &= ~primitive_header_bits; + } else { + /* If fs doesn't need primitive header, then it won't be made + * available through SBE_MESH, so we have to skip them when + * calculating offset from start of per-prim data. + */ + per_prim_start_dw += mue_map->per_primitive_header_size_dw; + per_prim_size_dw -= mue_map->per_primitive_header_size_dw; } - for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { - if (per_prim_inputs_read & BITFIELD64_BIT(i)) { - prog_data->urb_setup[i] = urb_next++; - } + u_foreach_bit64(i, per_prim_inputs_read) { + int start = mue_map->start_dw[i]; + + assert(start >= 0); + assert(mue_map->len_dw[i] > 0); + + assert(unsigned(start) >= per_prim_start_dw); + unsigned pos_dw = unsigned(start) - per_prim_start_dw; + + prog_data->urb_setup[i] = urb_next + pos_dw / 4; + prog_data->urb_setup_channel[i] = pos_dw % 4; } - /* The actual setup attributes later must be aligned to a full GRF. */ - urb_next = ALIGN(urb_next, 2); + urb_next = per_prim_size_dw / 4; prog_data->num_per_primitive_inputs = urb_next; } @@ -1835,21 +1846,43 @@ calculate_urb_setup(const struct intel_device_info *devinfo, unique_fs_attrs &= ~clip_dist_bits; } + unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw; + unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw; + + /* Per-Vertex header is never available to fragment shader. */ + per_vertex_start_dw += 8; + per_vertex_size_dw -= 8; + /* In Mesh, CLIP_DIST slots are always at the beginning, because * they come from MUE Vertex Header, not Per-Vertex Attributes. */ if (inputs_read & clip_dist_bits) { - prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next++; - prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next++; + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next; + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1; + } else if (mue_map->per_vertex_header_size_dw > 8) { + /* Clip distances are in MUE, but we are not reading them in FS. */ + per_vertex_start_dw += 8; + per_vertex_size_dw -= 8; } /* Per-Vertex attributes are laid out ordered. Because we always link * Mesh and Fragment shaders, the which slots are written and read by * each of them will match. */ - for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { - if (unique_fs_attrs & BITFIELD64_BIT(i)) - prog_data->urb_setup[i] = urb_next++; + + u_foreach_bit64(i, unique_fs_attrs) { + int start = mue_map->start_dw[i]; + + assert(start >= 0); + assert(mue_map->len_dw[i] > 0); + + assert(unsigned(start) >= per_vertex_start_dw); + unsigned pos_dw = unsigned(start) - per_vertex_start_dw; + + prog_data->urb_setup[i] = urb_next + pos_dw / 4; + prog_data->urb_setup_channel[i] = pos_dw % 4; } + + urb_next += per_vertex_size_dw / 4; } else if (devinfo->ver >= 6) { uint64_t vue_header_bits = VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 923d86c2235..fa06fbb50ca 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -438,7 +438,7 @@ public: fs_reg get_timestamp(const brw::fs_builder &bld); fs_reg interp_reg(int location, int channel); - fs_reg per_primitive_reg(int location); + fs_reg per_primitive_reg(int location, unsigned comp); virtual void dump_instruction_to_file(const backend_instruction *inst, FILE *file) const; virtual void dump_instructions_to_file(FILE *file) const; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 1d26edd216d..8367ef9b26d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3489,7 +3489,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, assert(base != VARYING_SLOT_PRIMITIVE_INDICES); for (unsigned int i = 0; i < num_components; i++) { bld.MOV(offset(dest, bld, i), - retype(component(per_primitive_reg(base), comp + i), dest.type)); + retype(per_primitive_reg(base, comp + i), dest.type)); } } else { for (unsigned int i = 0; i < num_components; i++) { diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 3cf29c7b48b..f713e9feb56 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -126,6 +126,7 @@ fs_visitor::interp_reg(int location, int channel) assert(prog_data->urb_setup[location] >= 0); unsigned nr = prog_data->urb_setup[location]; + channel += prog_data->urb_setup_channel[location]; /* Adjust so we start counting from the first per_vertex input. */ assert(nr >= prog_data->num_per_primitive_inputs); @@ -142,19 +143,22 @@ fs_visitor::interp_reg(int location, int channel) * generate_code() time. */ fs_reg -fs_visitor::per_primitive_reg(int location) +fs_visitor::per_primitive_reg(int location, unsigned comp) { assert(stage == MESA_SHADER_FRAGMENT); assert(BITFIELD64_BIT(location) & nir->info.per_primitive_inputs); const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); + comp += prog_data->urb_setup_channel[location]; + assert(prog_data->urb_setup[location] >= 0); - const unsigned regnr = prog_data->urb_setup[location]; + const unsigned regnr = prog_data->urb_setup[location] + comp / 4; + assert(regnr < prog_data->num_per_primitive_inputs); - return fs_reg(ATTR, regnr, BRW_REGISTER_TYPE_F); + return component(fs_reg(ATTR, regnr, BRW_REGISTER_TYPE_F), comp % 4); } /** Emits the interpolation for the varying inputs. */ diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 04ef760d060..cf4b589341e 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -21,6 +21,8 @@ * IN THE SOFTWARE. */ +#include +#include #include "brw_compiler.h" #include "brw_fs.h" #include "brw_nir.h" @@ -414,6 +416,224 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map) nir_address_format_32bit_offset); } +/* Attribute types. Flat attributes have to be a separate class because + * flat and interpolated attributes can't share the same vec4 slot + * (see 3DSTATE_SBE.ConstantInterpolationEnable). + */ +enum { + PRIM, /* per primitive */ + VERT, /* per vertex interpolated */ + VERT_FLAT, /* per vertex flat */ +}; + +struct attr_desc { + int location; + const struct glsl_type *type; + unsigned dwords; + unsigned slots; +}; + +struct attr_type_info { + /* order of attributes, negative values are holes */ + std::list *order; + + /* attributes after which there's hole of size equal to array index */ + std::list holes[4]; +}; + +static void +brw_mue_assign_position(const struct attr_desc *attr, + struct brw_mue_map *map, + unsigned start_dw) +{ + bool is_array = glsl_type_is_array(attr->type); + int location = attr->location; + unsigned remaining = attr->dwords; + + for (unsigned slot = 0; slot < attr->slots; ++slot) { + map->start_dw[location + slot] = start_dw; + + unsigned sz; + + if (is_array) { + assert(attr->dwords % attr->slots == 0); + sz = attr->dwords / attr->slots; + } else { + sz = MIN2(remaining, 4); + } + + map->len_dw[location + slot] = sz; + start_dw += sz; + remaining -= sz; + } +} + +static nir_variable * +brw_nir_find_complete_variable_with_location(nir_shader *shader, + nir_variable_mode mode, + int location) +{ + nir_variable *best_var = NULL; + unsigned last_size = 0; + + nir_foreach_variable_with_modes(var, shader, mode) { + if (var->data.location != location) + continue; + + unsigned new_size = glsl_count_dword_slots(var->type, false); + if (new_size > last_size) { + best_var = var; + last_size = new_size; + } + } + + return best_var; +} + +/* Finds order of outputs which require minimum size, without splitting + * of URB read/write messages (which operate on vec4-aligned memory). + */ +static void +brw_compute_mue_layout(std::list *orders, + uint64_t outputs_written, + struct nir_shader *nir) +{ + const struct shader_info *info = &nir->info; + + struct attr_type_info data[3]; + bool no_compact = !debug_get_bool_option("BRW_MESH_COMPACTION", true); + + for (unsigned i = PRIM; i <= VERT_FLAT; ++i) + data[i].order = &orders[i]; + + u_foreach_bit64(location, outputs_written) { + if ((BITFIELD64_BIT(location) & outputs_written) == 0) + continue; + + /* At this point there are both complete and split variables as + * outputs. We need the complete variable to compute the required + * size. + */ + nir_variable *var = + brw_nir_find_complete_variable_with_location(nir, + nir_var_shader_out, + location); + + struct attr_desc d; + d.location = location; + d.type = brw_nir_get_var_type(nir, var); + d.dwords = glsl_count_dword_slots(d.type, false); + d.slots = glsl_count_attribute_slots(d.type, false); + + struct attr_type_info *type_data; + + if (BITFIELD64_BIT(location) & info->per_primitive_outputs) + type_data = &data[PRIM]; + else if (var->data.interpolation == INTERP_MODE_FLAT) + type_data = &data[VERT_FLAT]; + else + type_data = &data[VERT]; + + std::list *order = type_data->order; + std::list *holes = type_data->holes; + + outputs_written &= ~BITFIELD64_RANGE(location, d.slots); + + int mod = d.dwords % 4; + if (mod == 0) { + order->push_back(d); + continue; + } + + struct attr_desc h; + h.location = -1; + h.type = NULL; + h.dwords = 4 - mod; + h.slots = 0; + + if (no_compact) { + order->push_back(d); + order->push_back(h); + continue; + } + + if (d.dwords > 4) { + order->push_back(d); + order->push_back(h); + holes[h.dwords].push_back(location); + continue; + } + + assert(d.dwords < 4); + + unsigned found = 0; + /* try to find the smallest hole big enough to hold this attribute */ + for (unsigned sz = d.dwords; sz < 4; sz++){ + if (!holes[sz].empty()) { + found = sz; + break; + } + } + + /* append at the end if not found */ + if (found == 0) { + order->push_back(d); + order->push_back(h); + holes[h.dwords].push_back(location); + + continue; + } + + assert(found < 4); + assert(!holes[found].empty()); + int after_loc = holes[found].back(); + holes[found].pop_back(); + + bool inserted_back = false; + + for (auto it = order->begin(); it != order->end(); ++it) { + if ((*it).location != after_loc) + continue; + + ++it; + /* must be a hole */ + assert((*it).location < 0); + /* and it must be big enough */ + assert(d.dwords <= (*it).dwords); + + if (d.dwords == (*it).dwords) { + /* exact size, just replace */ + *it = d; + } else { + /* inexact size, shrink hole */ + (*it).dwords -= d.dwords; + /* and insert new attribute before it */ + order->insert(it, d); + + /* Insert shrunk hole in a spot so that the order of attributes + * is preserved. + */ + std::list &hole_list = holes[(*it).dwords]; + std::list::iterator insert_before = hole_list.end(); + + for (auto it2 = hole_list.begin(); it2 != hole_list.end(); ++it2) { + if ((*it2) >= (int)location) { + insert_before = it2; + break; + } + } + + hole_list.insert(insert_before, location); + } + + inserted_back = true; + break; + } + + assert(inserted_back); + } +} + /* Mesh URB Entry consists of an initial section * * - Primitive Count @@ -443,8 +663,8 @@ brw_compute_mue_map(struct nir_shader *nir, struct brw_mue_map *map, { memset(map, 0, sizeof(*map)); - for (int i = 0; i < VARYING_SLOT_MAX; i++) - map->start_dw[i] = -1; + memset(&map->start_dw[0], -1, sizeof(map->start_dw)); + memset(&map->len_dw[0], 0, sizeof(map->len_dw)); unsigned vertices_per_primitive = num_mesh_vertices_per_primitive(nir->info.mesh.primitive_type); @@ -454,16 +674,6 @@ brw_compute_mue_map(struct nir_shader *nir, struct brw_mue_map *map, uint64_t outputs_written = nir->info.outputs_written; - /* Assign initial section. */ - if (BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) & outputs_written) { - map->start_dw[VARYING_SLOT_PRIMITIVE_COUNT] = 0; - outputs_written &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT); - } - if (BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) & outputs_written) { - map->start_dw[VARYING_SLOT_PRIMITIVE_INDICES] = 1; - outputs_written &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES); - } - /* One dword for primitives count then K extra dwords for each primitive. */ switch (index_format) { case BRW_INDEX_FORMAT_U32: @@ -479,86 +689,157 @@ brw_compute_mue_map(struct nir_shader *nir, struct brw_mue_map *map, map->per_primitive_start_dw = ALIGN(map->per_primitive_indices_dw * map->max_primitives + 1, 8); - /* TODO(mesh): Multiview. */ - map->per_primitive_header_size_dw = - (nir->info.outputs_written & (BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) | - BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE) | - BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE) | - BITFIELD64_BIT(VARYING_SLOT_LAYER))) ? 8 : 0; + /* Assign initial section. */ + if (BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) & outputs_written) { + map->start_dw[VARYING_SLOT_PRIMITIVE_COUNT] = 0; + map->len_dw[VARYING_SLOT_PRIMITIVE_COUNT] = 1; + outputs_written &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT); + } + if (BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) & outputs_written) { + map->start_dw[VARYING_SLOT_PRIMITIVE_INDICES] = 1; + map->len_dw[VARYING_SLOT_PRIMITIVE_INDICES] = + map->per_primitive_indices_dw * map->max_primitives; + outputs_written &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES); + } - map->per_primitive_data_size_dw = 0; - u_foreach_bit64(location, outputs_written & nir->info.per_primitive_outputs) { - assert(map->start_dw[location] == -1); + const uint64_t per_primitive_header_bits = + BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE) | + BITFIELD64_BIT(VARYING_SLOT_LAYER) | + BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) | + BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE); - unsigned start; - switch (location) { - case VARYING_SLOT_PRIMITIVE_SHADING_RATE: - start = map->per_primitive_start_dw + 0; - break; - case VARYING_SLOT_LAYER: - start = map->per_primitive_start_dw + 1; /* RTAIndex */ - break; - case VARYING_SLOT_VIEWPORT: - start = map->per_primitive_start_dw + 2; - break; - case VARYING_SLOT_CULL_PRIMITIVE: - start = map->per_primitive_start_dw + 3; - break; - default: - assert(location == VARYING_SLOT_PRIMITIVE_ID || - location >= VARYING_SLOT_VAR0); - start = map->per_primitive_start_dw + - map->per_primitive_header_size_dw + - map->per_primitive_data_size_dw; - map->per_primitive_data_size_dw += 4; - break; + const uint64_t per_vertex_header_bits = + BITFIELD64_BIT(VARYING_SLOT_PSIZ) | + BITFIELD64_BIT(VARYING_SLOT_POS) | + BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) | + BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + + std::list orders[3]; + uint64_t regular_outputs = outputs_written & + ~(per_primitive_header_bits | per_vertex_header_bits); + brw_compute_mue_layout(orders, regular_outputs, nir); + + if (outputs_written & per_primitive_header_bits) { + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) { + map->start_dw[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = + map->per_primitive_start_dw + 0; + map->len_dw[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 1; } - map->start_dw[location] = start; + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_LAYER)) { + map->start_dw[VARYING_SLOT_LAYER] = + map->per_primitive_start_dw + 1; /* RTAIndex */ + map->len_dw[VARYING_SLOT_LAYER] = 1; + } + + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)) { + map->start_dw[VARYING_SLOT_VIEWPORT] = + map->per_primitive_start_dw + 2; + map->len_dw[VARYING_SLOT_VIEWPORT] = 1; + } + + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE)) { + map->start_dw[VARYING_SLOT_CULL_PRIMITIVE] = + map->per_primitive_start_dw + 3; + map->len_dw[VARYING_SLOT_CULL_PRIMITIVE] = 1; + } + + map->per_primitive_header_size_dw = 8; + outputs_written &= ~per_primitive_header_bits; + } else { + map->per_primitive_header_size_dw = 0; + } + + map->per_primitive_data_size_dw = 0; + + unsigned start_dw = map->per_primitive_start_dw + + map->per_primitive_header_size_dw; + for (auto it = orders[PRIM].cbegin(); it != orders[PRIM].cend(); ++it) { + int location = (*it).location; + if (location < 0) { + start_dw += (*it).dwords; + map->per_primitive_data_size_dw += (*it).dwords; + continue; + } + + assert(map->start_dw[location] == -1); + + assert(location == VARYING_SLOT_PRIMITIVE_ID || + location >= VARYING_SLOT_VAR0); + + brw_mue_assign_position(&*it, map, start_dw); + + start_dw += (*it).dwords; + map->per_primitive_data_size_dw += (*it).dwords; + outputs_written &= ~BITFIELD64_RANGE(location, (*it).slots); } map->per_primitive_pitch_dw = ALIGN(map->per_primitive_header_size_dw + map->per_primitive_data_size_dw, 8); map->per_vertex_start_dw = ALIGN(map->per_primitive_start_dw + - map->per_primitive_pitch_dw * map->max_primitives, 8); + map->per_primitive_pitch_dw * + map->max_primitives, 8); /* TODO(mesh): Multiview. */ unsigned fixed_header_size = 8; map->per_vertex_header_size_dw = ALIGN(fixed_header_size + nir->info.clip_distance_array_size + nir->info.cull_distance_array_size, 8); - map->per_vertex_data_size_dw = 0; - u_foreach_bit64(location, outputs_written & ~nir->info.per_primitive_outputs) { - assert(map->start_dw[location] == -1); - unsigned start; - switch (location) { - case VARYING_SLOT_PSIZ: - start = map->per_vertex_start_dw + 3; - break; - case VARYING_SLOT_POS: - start = map->per_vertex_start_dw + 4; - break; - case VARYING_SLOT_CLIP_DIST0: - start = map->per_vertex_start_dw + fixed_header_size + 0; - break; - case VARYING_SLOT_CLIP_DIST1: - start = map->per_vertex_start_dw + fixed_header_size + 4; - break; - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_CULL_DIST1: - unreachable("cull distances should be lowered earlier"); - break; - default: - assert(location >= VARYING_SLOT_VAR0); - start = map->per_vertex_start_dw + - map->per_vertex_header_size_dw + - map->per_vertex_data_size_dw; - map->per_vertex_data_size_dw += 4; - break; + if (outputs_written & per_vertex_header_bits) { + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_PSIZ)) { + map->start_dw[VARYING_SLOT_PSIZ] = map->per_vertex_start_dw + 3; + map->len_dw[VARYING_SLOT_PSIZ] = 1; + } + + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_POS)) { + map->start_dw[VARYING_SLOT_POS] = map->per_vertex_start_dw + 4; + map->len_dw[VARYING_SLOT_POS] = 4; + } + + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) { + map->start_dw[VARYING_SLOT_CLIP_DIST0] = + map->per_vertex_start_dw + fixed_header_size + 0; + map->len_dw[VARYING_SLOT_CLIP_DIST0] = 4; + } + + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) { + map->start_dw[VARYING_SLOT_CLIP_DIST1] = + map->per_vertex_start_dw + fixed_header_size + 4; + map->len_dw[VARYING_SLOT_CLIP_DIST1] = 4; + } + + outputs_written &= ~per_vertex_header_bits; + } + + /* cull distances should be lowered earlier */ + assert(!(outputs_written & BITFIELD64_BIT(VARYING_SLOT_CULL_DIST0))); + assert(!(outputs_written & BITFIELD64_BIT(VARYING_SLOT_CULL_DIST1))); + + map->per_vertex_data_size_dw = 0; + + start_dw = map->per_vertex_start_dw + + map->per_vertex_header_size_dw; + for (unsigned type = VERT; type <= VERT_FLAT; ++type) { + for (auto it = orders[type].cbegin(); it != orders[type].cend(); ++it) { + int location = (*it).location; + if (location < 0) { + start_dw += (*it).dwords; + map->per_vertex_data_size_dw += (*it).dwords; + continue; + } + + assert(map->start_dw[location] == -1); + + assert(location >= VARYING_SLOT_VAR0); + + brw_mue_assign_position(&*it, map, start_dw); + + start_dw += (*it).dwords; + map->per_vertex_data_size_dw += (*it).dwords; + outputs_written &= ~BITFIELD64_RANGE(location, (*it).slots); } - map->start_dw[location] = start; } map->per_vertex_pitch_dw = ALIGN(map->per_vertex_header_size_dw + @@ -571,14 +852,18 @@ brw_compute_mue_map(struct nir_shader *nir, struct brw_mue_map *map, } static void -brw_print_mue_map(FILE *fp, const struct brw_mue_map *map) +brw_print_mue_map(FILE *fp, const struct brw_mue_map *map, struct nir_shader *nir) { fprintf(fp, "MUE map (%d dwords, %d primitives, %d vertices)\n", map->size_dw, map->max_primitives, map->max_vertices); - fprintf(fp, " %4d: VARYING_SLOT_PRIMITIVE_COUNT\n", - map->start_dw[VARYING_SLOT_PRIMITIVE_COUNT]); - fprintf(fp, " %4d: VARYING_SLOT_PRIMITIVE_INDICES\n", - map->start_dw[VARYING_SLOT_PRIMITIVE_INDICES]); + fprintf(fp, " <%4d, %4d>: VARYING_SLOT_PRIMITIVE_COUNT\n", + map->start_dw[VARYING_SLOT_PRIMITIVE_COUNT], + map->start_dw[VARYING_SLOT_PRIMITIVE_COUNT] + + map->len_dw[VARYING_SLOT_PRIMITIVE_COUNT] - 1); + fprintf(fp, " <%4d, %4d>: VARYING_SLOT_PRIMITIVE_INDICES\n", + map->start_dw[VARYING_SLOT_PRIMITIVE_INDICES], + map->start_dw[VARYING_SLOT_PRIMITIVE_INDICES] + + map->len_dw[VARYING_SLOT_PRIMITIVE_INDICES] - 1); fprintf(fp, " ----- per primitive (start %d, header_size %d, data_size %d, pitch %d)\n", map->per_primitive_start_dw, @@ -589,13 +874,20 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map) for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { if (map->start_dw[i] < 0) continue; + const unsigned offset = map->start_dw[i]; - if (offset >= map->per_primitive_start_dw && - offset < map->per_primitive_start_dw + map->per_primitive_pitch_dw) { - fprintf(fp, " %4d: %s\n", offset, - gl_varying_slot_name_for_stage((gl_varying_slot)i, - MESA_SHADER_MESH)); - } + const unsigned len = map->len_dw[i]; + + if (offset < map->per_primitive_start_dw || + offset >= map->per_primitive_start_dw + map->per_primitive_pitch_dw) + continue; + + const char *name = + gl_varying_slot_name_for_stage((gl_varying_slot)i, + MESA_SHADER_MESH); + + fprintf(fp, " <%4d, %4d>: %s (%d)\n", offset, offset + len - 1, + name, i); } fprintf(fp, " ----- per vertex (start %d, header_size %d, data_size %d, pitch %d)\n", @@ -607,13 +899,24 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map) for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { if (map->start_dw[i] < 0) continue; + const unsigned offset = map->start_dw[i]; - if (offset >= map->per_vertex_start_dw && - offset < map->per_vertex_start_dw + map->per_vertex_pitch_dw) { - fprintf(fp, " %4d: %s\n", offset, - gl_varying_slot_name_for_stage((gl_varying_slot)i, - MESA_SHADER_MESH)); - } + const unsigned len = map->len_dw[i]; + + if (offset < map->per_vertex_start_dw || + offset >= map->per_vertex_start_dw + map->per_vertex_pitch_dw) + continue; + + nir_variable *var = + nir_find_variable_with_location(nir, nir_var_shader_out, i); + bool flat = var->data.interpolation == INTERP_MODE_FLAT; + + const char *name = + gl_varying_slot_name_for_stage((gl_varying_slot)i, + MESA_SHADER_MESH); + + fprintf(fp, " <%4d, %4d>: %s (%d)%s\n", offset, offset + len - 1, + name, i, flat ? " (flat)" : ""); } fprintf(fp, "\n"); @@ -1070,7 +1373,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, brw_print_tue_map(stderr, params->tue_map); } fprintf(stderr, "Mesh Output "); - brw_print_mue_map(stderr, &prog_data->map); + brw_print_mue_map(stderr, &prog_data->map, nir); } fs_generator g(compiler, ¶ms->base, &prog_data->base.base, diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 11c2c1259c3..5b459985af3 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2085,6 +2085,21 @@ brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load_uniform, return sysval; } +const struct glsl_type * +brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var) +{ + const struct glsl_type *type = var->interface_type; + if (!type) { + type = var->type; + if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + } + + return type; +} + bool brw_nir_pulls_at_sample(nir_shader *shader) { diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 84d1ee081d3..88966e24caf 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -283,6 +283,9 @@ nir_ssa_def *brw_nir_load_global_const(nir_builder *b, nir_ssa_def *base_addr, unsigned off); +const struct glsl_type *brw_nir_get_var_type(const struct nir_shader *nir, + nir_variable *var); + #ifdef __cplusplus } #endif