From f94087be2c14cd1b59bef99235ecc0d47e198b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Fri, 30 Aug 2024 15:50:39 +0200 Subject: [PATCH] r300/compiler: reformat using default mesa .clang-format rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most notably switch from tabs to 3 spaces. Signed-off-by: Pavel Ondračka Acked-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/memory_pool.c | 85 +- .../drivers/r300/compiler/memory_pool.h | 43 +- src/gallium/drivers/r300/compiler/nir_to_rc.c | 456 ++-- src/gallium/drivers/r300/compiler/nir_to_rc.h | 3 +- .../drivers/r300/compiler/r300_fragprog.c | 501 ++-- .../drivers/r300/compiler/r300_fragprog.h | 1 - .../r300/compiler/r300_fragprog_emit.c | 771 +++--- .../r300/compiler/r300_fragprog_swizzle.c | 281 +- src/gallium/drivers/r300/compiler/r300_nir.c | 46 +- src/gallium/drivers/r300/compiler/r300_nir.h | 27 +- .../drivers/r300/compiler/r3xx_fragprog.c | 211 +- .../drivers/r300/compiler/r3xx_vertprog.c | 1305 +++++----- .../r300/compiler/r3xx_vertprog_dump.c | 335 +-- .../drivers/r300/compiler/r500_fragprog.c | 838 +++--- .../drivers/r300/compiler/r500_fragprog.h | 4 +- .../r300/compiler/r500_fragprog_emit.c | 987 ++++--- .../r300/compiler/r500_nir_lower_fcsel.c | 39 +- .../drivers/r300/compiler/radeon_code.c | 260 +- .../drivers/r300/compiler/radeon_code.h | 342 ++- .../drivers/r300/compiler/radeon_compiler.c | 740 +++--- .../drivers/r300/compiler/radeon_compiler.h | 163 +- .../r300/compiler/radeon_compiler_util.c | 983 ++++--- .../r300/compiler/radeon_compiler_util.h | 79 +- .../drivers/r300/compiler/radeon_dataflow.c | 1200 ++++----- .../drivers/r300/compiler/radeon_dataflow.h | 122 +- .../r300/compiler/radeon_dataflow_deadcode.c | 488 ++-- .../r300/compiler/radeon_dataflow_swizzles.c | 988 ++++--- .../r300/compiler/radeon_inline_literals.c | 211 +- .../drivers/r300/compiler/radeon_list.c | 82 +- .../drivers/r300/compiler/radeon_list.h | 17 +- .../drivers/r300/compiler/radeon_opcodes.c | 984 ++++--- .../drivers/r300/compiler/radeon_opcodes.h | 280 +- .../drivers/r300/compiler/radeon_optimize.c | 2269 ++++++++--------- .../r300/compiler/radeon_pair_dead_sources.c | 83 +- .../r300/compiler/radeon_pair_regalloc.c | 596 +++-- .../r300/compiler/radeon_pair_schedule.c | 2032 ++++++++------- .../r300/compiler/radeon_pair_translate.c | 589 +++-- .../drivers/r300/compiler/radeon_program.c | 137 +- .../drivers/r300/compiler/radeon_program.h | 200 +- .../r300/compiler/radeon_program_alu.c | 537 ++-- .../r300/compiler/radeon_program_alu.h | 24 +- .../r300/compiler/radeon_program_constants.h | 235 +- .../r300/compiler/radeon_program_pair.c | 331 ++- .../r300/compiler/radeon_program_pair.h | 78 +- .../r300/compiler/radeon_program_print.c | 780 +++--- .../r300/compiler/radeon_program_tex.c | 702 +++-- .../r300/compiler/radeon_program_tex.h | 5 +- .../drivers/r300/compiler/radeon_regalloc.c | 825 +++--- .../drivers/r300/compiler/radeon_regalloc.h | 129 +- .../r300/compiler/radeon_remove_constants.c | 459 ++-- .../r300/compiler/radeon_rename_regs.c | 53 +- .../drivers/r300/compiler/radeon_swizzle.h | 28 +- .../drivers/r300/compiler/radeon_variable.c | 894 +++---- .../drivers/r300/compiler/radeon_variable.h | 65 +- .../drivers/r300/compiler/radeon_vert_fc.c | 395 ++- 55 files changed, 11838 insertions(+), 12480 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c index a873b46a4d0..44fe1bb0789 100644 --- a/src/gallium/drivers/r300/compiler/memory_pool.c +++ b/src/gallium/drivers/r300/compiler/memory_pool.c @@ -9,72 +9,71 @@ #include #include - #define POOL_LARGE_ALLOC 4096 -#define POOL_ALIGN 8 - +#define POOL_ALIGN 8 struct memory_block { - struct memory_block * next; + struct memory_block *next; }; -void memory_pool_init(struct memory_pool * pool) +void +memory_pool_init(struct memory_pool *pool) { - memset(pool, 0, sizeof(struct memory_pool)); + memset(pool, 0, sizeof(struct memory_pool)); } - -void memory_pool_destroy(struct memory_pool * pool) +void +memory_pool_destroy(struct memory_pool *pool) { - while(pool->blocks) { - struct memory_block * block = pool->blocks; - pool->blocks = block->next; - free(block); - } + while (pool->blocks) { + struct memory_block *block = pool->blocks; + pool->blocks = block->next; + free(block); + } } -static void refill_pool(struct memory_pool * pool) +static void +refill_pool(struct memory_pool *pool) { - unsigned int blocksize = pool->total_allocated; - struct memory_block * newblock; + unsigned int blocksize = pool->total_allocated; + struct memory_block *newblock; - if (!blocksize) - blocksize = 2*POOL_LARGE_ALLOC; + if (!blocksize) + blocksize = 2 * POOL_LARGE_ALLOC; - newblock = malloc(blocksize); - newblock->next = pool->blocks; - pool->blocks = newblock; + newblock = malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; - pool->head = (unsigned char*)(newblock + 1); - pool->end = ((unsigned char*)newblock) + blocksize; - pool->total_allocated += blocksize; + pool->head = (unsigned char *)(newblock + 1); + pool->end = ((unsigned char *)newblock) + blocksize; + pool->total_allocated += blocksize; } - -void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +void * +memory_pool_malloc(struct memory_pool *pool, unsigned int bytes) { - if (bytes < POOL_LARGE_ALLOC) { - void * ptr; + if (bytes < POOL_LARGE_ALLOC) { + void *ptr; - if (pool->head + bytes > pool->end) - refill_pool(pool); + if (pool->head + bytes > pool->end) + refill_pool(pool); - assert(pool->head + bytes <= pool->end); + assert(pool->head + bytes <= pool->end); - ptr = pool->head; + ptr = pool->head; - pool->head += bytes; - pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + pool->head += bytes; + pool->head = + (unsigned char *)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); - return ptr; - } else { - struct memory_block * block = malloc(bytes + sizeof(struct memory_block)); + return ptr; + } else { + struct memory_block *block = malloc(bytes + sizeof(struct memory_block)); - block->next = pool->blocks; - pool->blocks = block; + block->next = pool->blocks; + pool->blocks = block; - return (block + 1); - } + return (block + 1); + } } - - diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h index 456d54f986f..4e57a4974c7 100644 --- a/src/gallium/drivers/r300/compiler/memory_pool.h +++ b/src/gallium/drivers/r300/compiler/memory_pool.h @@ -18,17 +18,15 @@ struct memory_block; * reference counting headaches. */ struct memory_pool { - unsigned char * head; - unsigned char * end; - unsigned int total_allocated; - struct memory_block * blocks; + unsigned char *head; + unsigned char *end; + unsigned int total_allocated; + struct memory_block *blocks; }; - -void memory_pool_init(struct memory_pool * pool); -void memory_pool_destroy(struct memory_pool * pool); -void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); - +void memory_pool_init(struct memory_pool *pool); +void memory_pool_destroy(struct memory_pool *pool); +void *memory_pool_malloc(struct memory_pool *pool, unsigned int bytes); /** * Generic helper for growing an array that has separate size/count @@ -46,18 +44,19 @@ void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); * \warning Array, Size, Reserved have to be lvalues and may be evaluated * several times. */ -#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ - unsigned int _num = (num); \ - if ((size) + _num > (reserved)) { \ - unsigned int newreserve = (reserved) * 2; \ - type * newarray; \ - if (newreserve < _num) \ - newreserve = 4 * _num; /* arbitrary heuristic */ \ - newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ - memcpy(newarray, (array), (size) * sizeof(type)); \ - (array) = newarray; \ - (reserved) = newreserve; \ - } \ -} while(0) +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) \ + do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type *newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ + } while (0) #endif /* MEMORY_POOL_H */ diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 1b8b39df6bf..ade4a626cf6 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -3,13 +3,11 @@ * SPDX-License-Identifier: MIT */ +#include "nir_to_rc.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_deref.h" #include "compiler/nir/nir_legacy.h" #include "compiler/nir/nir_worklist.h" -#include "nir_to_rc.h" -#include "r300_nir.h" -#include "r300_screen.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "tgsi/tgsi_dump.h" @@ -19,9 +17,11 @@ #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_util.h" #include "util/u_debug.h" +#include "util/u_dynarray.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_dynarray.h" +#include "r300_nir.h" +#include "r300_screen.h" struct ntr_insn { enum tgsi_opcode opcode; @@ -110,79 +110,67 @@ static void ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list); static void ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list); static struct ntr_insn * -ntr_insn(struct ntr_compile *c, enum tgsi_opcode opcode, - struct ureg_dst dst, - struct ureg_src src0, struct ureg_src src1, - struct ureg_src src2, struct ureg_src src3) +ntr_insn(struct ntr_compile *c, enum tgsi_opcode opcode, struct ureg_dst dst, struct ureg_src src0, + struct ureg_src src1, struct ureg_src src2, struct ureg_src src3) { struct ntr_insn insn = { .opcode = opcode, - .dst = { dst, ureg_dst_undef() }, - .src = { src0, src1, src2, src3 }, + .dst = {dst, ureg_dst_undef()}, + .src = {src0, src1, src2, src3}, .precise = c->precise, }; util_dynarray_append(&c->cur_block->insns, struct ntr_insn, insn); return util_dynarray_top_ptr(&c->cur_block->insns, struct ntr_insn); } -#define OP00( op ) \ -static inline void ntr_##op(struct ntr_compile *c) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \ -} +#define OP00(op) \ + static inline void ntr_##op(struct ntr_compile *c) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(), \ + ureg_src_undef(), ureg_src_undef()); \ + } -#define OP01( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_src src0) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \ -} +#define OP01(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_src src0) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(), \ + ureg_src_undef()); \ + } +#define OP10(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), \ + ureg_src_undef()); \ + } -#define OP10( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_dst dst) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \ -} +#define OP11(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(), \ + ureg_src_undef()); \ + } -#define OP11( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_dst dst, \ - struct ureg_src src0) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \ -} +#define OP12(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0, \ + struct ureg_src src1) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef()); \ + } -#define OP12( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef()); \ -} +#define OP13(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0, \ + struct ureg_src src1, struct ureg_src src2) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef()); \ + } -#define OP13( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef()); \ -} - -#define OP14( op ) \ -static inline void ntr_##op(struct ntr_compile *c, \ - struct ureg_dst dst, \ - struct ureg_src src0, \ - struct ureg_src src1, \ - struct ureg_src src2, \ - struct ureg_src src3) \ -{ \ - ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3); \ -} +#define OP14(op) \ + static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0, \ + struct ureg_src src1, struct ureg_src src2, struct ureg_src src3) \ + { \ + ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3); \ + } /* We hand-craft our tex instructions */ #define OP12_TEX(op) @@ -235,17 +223,17 @@ struct ntr_live_reg_state { }; static void -ntr_live_reg_mark_use(struct ntr_compile *c, struct ntr_live_reg_block_state *bs, - int ip, unsigned index, unsigned used_mask) +ntr_live_reg_mark_use(struct ntr_compile *c, struct ntr_live_reg_block_state *bs, int ip, + unsigned index, unsigned used_mask) { bs->use[index] |= used_mask & ~bs->def[index]; c->liveness[index].start = MIN2(c->liveness[index].start, ip); c->liveness[index].end = MAX2(c->liveness[index].end, ip); - } static void -ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl, struct ntr_live_reg_state *state) +ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl, + struct ntr_live_reg_state *state) { for (int i = 0; i < impl->num_blocks; i++) { state->blocks[i].def = rzalloc_array(state->blocks, uint8_t, c->num_temps); @@ -257,15 +245,14 @@ ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl, struc } int ip = 0; - nir_foreach_block(block, impl) { + nir_foreach_block (block, impl) { struct ntr_live_reg_block_state *bs = &state->blocks[block->index]; struct ntr_block *ntr_block = ntr_block_from_nir(c, block); ntr_block->start_ip = ip; - util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) { - const struct tgsi_opcode_info *opcode_info = - tgsi_get_opcode_info(insn->opcode); + util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) { + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode); /* Set up use[] for the srcs. * @@ -278,14 +265,9 @@ ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl, struc continue; int index = insn->src[i].Index; - uint32_t used_mask = tgsi_util_get_src_usage_mask(insn->opcode, i, - insn->dst->WriteMask, - insn->src[i].SwizzleX, - insn->src[i].SwizzleY, - insn->src[i].SwizzleZ, - insn->src[i].SwizzleW, - insn->tex_target, - insn->tex_target); + uint32_t used_mask = tgsi_util_get_src_usage_mask( + insn->opcode, i, insn->dst->WriteMask, insn->src[i].SwizzleX, insn->src[i].SwizzleY, + insn->src[i].SwizzleZ, insn->src[i].SwizzleW, insn->tex_target, insn->tex_target); assert(!insn->src[i].Indirect || index < c->first_non_array_temp); ntr_live_reg_mark_use(c, bs, ip, index, used_mask); @@ -331,7 +313,7 @@ ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl) c->liveness = rzalloc_array(c, struct ntr_reg_interval, c->num_temps); struct ntr_live_reg_state state = { - .blocks = rzalloc_array(impl, struct ntr_live_reg_block_state, impl->num_blocks), + .blocks = rzalloc_array(impl, struct ntr_live_reg_block_state, impl->num_blocks), }; /* The intervals start out with start > end (indicating unused) */ @@ -342,7 +324,7 @@ ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl) /* Make a forward-order worklist of all the blocks. */ nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); - nir_foreach_block(block, impl) { + nir_foreach_block (block, impl) { nir_block_worklist_push_tail(&state.worklist, block); } @@ -360,7 +342,8 @@ ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl) continue; for (int i = 0; i < c->num_temps; i++) { - uint8_t new_def = state.blocks[block->index].defout[i] & ~state.blocks[succ->index].defin[i]; + uint8_t new_def = + state.blocks[block->index].defout[i] & ~state.blocks[succ->index].defin[i]; if (new_def) { state.blocks[succ->index].defin[i] |= new_def; @@ -372,7 +355,7 @@ ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl) } /* Make a reverse-order worklist of all the blocks. */ - nir_foreach_block(block, impl) { + nir_foreach_block (block, impl) { nir_block_worklist_push_head(&state.worklist, block); } @@ -408,11 +391,10 @@ ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl) /* Propagate use requests from either our block's uses or our * non-screened-off liveout up to our predecessors. */ - uint8_t new_livein = ((bs->use[i] | (bs->liveout[i] & ~bs->def[i])) & - ~bs->livein[i]); + uint8_t new_livein = ((bs->use[i] | (bs->liveout[i] & ~bs->def[i])) & ~bs->livein[i]); if (new_livein) { bs->livein[i] |= new_livein; - set_foreach(block->predecessors, entry) { + set_foreach (block->predecessors, entry) { nir_block *pred = (void *)entry->key; nir_block_worklist_push_tail(&state.worklist, pred); } @@ -458,15 +440,14 @@ ntr_allocate_regs(struct ntr_compile *c, nir_function_impl *impl) ra_map[i] = ~0; int ip = 0; - nir_foreach_block(block, impl) { + nir_foreach_block (block, impl) { struct ntr_block *ntr_block = ntr_block_from_nir(c, block); for (int i = 0; i < c->num_temps; i++) ntr_ra_check(c, ra_map, released, ip, i); - util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) { - const struct tgsi_opcode_info *opcode_info = - tgsi_get_opcode_info(insn->opcode); + util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) { + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode); for (int i = 0; i < opcode_info->num_src; i++) { if (insn->src[i].File == TGSI_FILE_TEMPORARY) { @@ -511,8 +492,7 @@ ntr_allocate_regs_unoptimized(struct ntr_compile *c, nir_function_impl *impl) static uint32_t ntr_tgsi_var_usage_mask(const struct nir_variable *var) { - const struct glsl_type *type_without_array = - glsl_without_array(var->type); + const struct glsl_type *type_without_array = glsl_without_array(var->type); unsigned num_components = glsl_get_vector_elements(type_without_array); if (num_components == 0) /* structs */ num_components = 4; @@ -530,8 +510,7 @@ ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *fra struct ureg_dst out; if (c->s->info.stage == MESA_SHADER_FRAGMENT) { unsigned semantic_name, semantic_index; - tgsi_get_gl_frag_result_semantic(semantics.location, - &semantic_name, &semantic_index); + tgsi_get_gl_frag_result_semantic(semantics.location, &semantic_name, &semantic_index); semantic_index += semantics.dual_source_blend_index; switch (semantics.location) { @@ -549,8 +528,7 @@ ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *fra } else { unsigned semantic_name, semantic_index; - tgsi_get_gl_varying_semantic(semantics.location, true, - &semantic_name, &semantic_index); + tgsi_get_gl_varying_semantic(semantics.location, true, &semantic_name, &semantic_index); uint32_t usage_mask = u_bit_consecutive(*frac, instr->num_components); uint32_t gs_streams = semantics.gs_streams; @@ -567,14 +545,8 @@ ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *fra */ bool invariant = semantics.invariant; - out = ureg_DECL_output_layout(c->ureg, - semantic_name, semantic_index, - gs_streams, - base, - usage_mask, - array_id, - semantics.num_slots, - invariant); + out = ureg_DECL_output_layout(c->ureg, semantic_name, semantic_index, gs_streams, base, + usage_mask, array_id, semantics.num_slots, invariant); } unsigned write_mask; @@ -588,9 +560,7 @@ ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *fra } static bool -ntr_try_store_in_tgsi_output_with_use(struct ntr_compile *c, - struct ureg_dst *dst, - nir_src *src) +ntr_try_store_in_tgsi_output_with_use(struct ntr_compile *c, struct ureg_dst *dst, nir_src *src) { *dst = ureg_dst_undef(); @@ -601,8 +571,7 @@ ntr_try_store_in_tgsi_output_with_use(struct ntr_compile *c, return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(nir_src_parent_instr(src)); - if (intr->intrinsic != nir_intrinsic_store_output || - !nir_src_is_const(intr->src[1])) { + if (intr->intrinsic != nir_intrinsic_store_output || !nir_src_is_const(intr->src[1])) { return false; } @@ -627,9 +596,9 @@ ntr_try_store_reg_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst, /* Look for a single use for try_store_in_tgsi_output */ nir_src *use = NULL; - nir_foreach_reg_load(src, reg_decl) { + nir_foreach_reg_load (src, reg_decl) { nir_intrinsic_instr *load = nir_instr_as_intrinsic(nir_src_parent_instr(src)); - nir_foreach_use_including_if(load_use, &load->def) { + nir_foreach_use_including_if (load_use, &load->def) { /* We can only have one use */ if (use != NULL) return false; @@ -649,15 +618,14 @@ ntr_try_store_reg_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst, * store_output emit its own MOV. */ static bool -ntr_try_store_ssa_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst, - nir_def *def) +ntr_try_store_ssa_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst, nir_def *def) { *dst = ureg_dst_undef(); if (!list_is_singular(&def->uses)) return false; - nir_foreach_use_including_if(use, def) { + nir_foreach_use_including_if (use, def) { return ntr_try_store_in_tgsi_output_with_use(c, dst, use); } unreachable("We have one use"); @@ -672,45 +640,40 @@ ntr_setup_inputs(struct ntr_compile *c) unsigned num_inputs = 0; int num_input_arrays = 0; - nir_foreach_shader_in_variable(var, c->s) { + nir_foreach_shader_in_variable (var, c->s) { const struct glsl_type *type = var->type; - unsigned array_len = - glsl_count_attribute_slots(type, false); + unsigned array_len = glsl_count_attribute_slots(type, false); num_inputs = MAX2(num_inputs, var->data.driver_location + array_len); } c->input_index_map = ralloc_array(c, struct ureg_src, num_inputs); - nir_foreach_shader_in_variable(var, c->s) { + nir_foreach_shader_in_variable (var, c->s) { const struct glsl_type *type = var->type; - unsigned array_len = - glsl_count_attribute_slots(type, false); + unsigned array_len = glsl_count_attribute_slots(type, false); unsigned interpolation = TGSI_INTERPOLATE_CONSTANT; unsigned sample_loc; struct ureg_src decl; if (c->s->info.stage == MESA_SHADER_FRAGMENT) { - interpolation = - tgsi_get_interp_mode(var->data.interpolation, - var->data.location == VARYING_SLOT_COL0 || - var->data.location == VARYING_SLOT_COL1); + interpolation = tgsi_get_interp_mode( + var->data.interpolation, + var->data.location == VARYING_SLOT_COL0 || var->data.location == VARYING_SLOT_COL1); if (var->data.location == VARYING_SLOT_POS) interpolation = TGSI_INTERPOLATE_LINEAR; } unsigned semantic_name, semantic_index; - tgsi_get_gl_varying_semantic(var->data.location, true, - &semantic_name, &semantic_index); + tgsi_get_gl_varying_semantic(var->data.location, true, &semantic_name, &semantic_index); if (var->data.sample) { sample_loc = TGSI_INTERPOLATE_LOC_SAMPLE; } else if (var->data.centroid) { sample_loc = TGSI_INTERPOLATE_LOC_CENTROID; - c->centroid_inputs |= (BITSET_MASK(array_len) << - var->data.driver_location); + c->centroid_inputs |= (BITSET_MASK(array_len) << var->data.driver_location); } else { sample_loc = TGSI_INTERPOLATE_LOC_CENTER; } @@ -721,14 +684,9 @@ ntr_setup_inputs(struct ntr_compile *c) uint32_t usage_mask = ntr_tgsi_var_usage_mask(var); - decl = ureg_DECL_fs_input_centroid_layout(c->ureg, - semantic_name, - semantic_index, - interpolation, - sample_loc, - var->data.driver_location, - usage_mask, - array_id, array_len); + decl = ureg_DECL_fs_input_centroid_layout( + c->ureg, semantic_name, semantic_index, interpolation, sample_loc, + var->data.driver_location, usage_mask, array_id, array_len); if (semantic_name == TGSI_SEMANTIC_FACE) { struct ureg_dst temp = ntr_temp(c); @@ -769,13 +727,12 @@ ntr_setup_outputs(struct ntr_compile *c) nir_sort_variables_with_modes(c->s, ntr_sort_by_location, nir_var_shader_out); - nir_foreach_shader_out_variable(var, c->s) { + nir_foreach_shader_out_variable (var, c->s) { if (var->data.location == FRAG_RESULT_COLOR) ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); unsigned semantic_name, semantic_index; - tgsi_get_gl_frag_result_semantic(var->data.location, - &semantic_name, &semantic_index); + tgsi_get_gl_frag_result_semantic(var->data.location, &semantic_name, &semantic_index); (void)ureg_DECL_output(c->ureg, semantic_name, semantic_index); } @@ -826,7 +783,7 @@ tgsi_return_type_from_base_type(enum glsl_base_type type) case GLSL_TYPE_UINT: return TGSI_RETURN_TYPE_UINT; case GLSL_TYPE_FLOAT: - return TGSI_RETURN_TYPE_FLOAT; + return TGSI_RETURN_TYPE_FLOAT; default: unreachable("unexpected texture type"); } @@ -835,37 +792,37 @@ tgsi_return_type_from_base_type(enum glsl_base_type type) static void ntr_setup_uniforms(struct ntr_compile *c) { - nir_foreach_uniform_variable(var, c->s) { + nir_foreach_uniform_variable (var, c->s) { if (glsl_type_is_sampler(glsl_without_array(var->type)) || glsl_type_is_texture(glsl_without_array(var->type))) { /* Don't use this size for the check for samplers -- arrays of structs * containing samplers should be ignored, and just the separate lowered * sampler uniform decl used. */ - int size = glsl_type_get_sampler_count(var->type) + - glsl_type_get_texture_count(var->type); + int size = glsl_type_get_sampler_count(var->type) + glsl_type_get_texture_count(var->type); const struct glsl_type *stype = glsl_without_array(var->type); - enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(glsl_get_sampler_dim(stype), - glsl_sampler_type_is_array(stype), - glsl_sampler_type_is_shadow(stype)); - enum tgsi_return_type ret_type = tgsi_return_type_from_base_type(glsl_get_sampler_result_type(stype)); + enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim( + glsl_get_sampler_dim(stype), glsl_sampler_type_is_array(stype), + glsl_sampler_type_is_shadow(stype)); + enum tgsi_return_type ret_type = + tgsi_return_type_from_base_type(glsl_get_sampler_result_type(stype)); for (int i = 0; i < size; i++) { - ureg_DECL_sampler_view(c->ureg, var->data.binding + i, - target, ret_type, ret_type, ret_type, ret_type); + ureg_DECL_sampler_view(c->ureg, var->data.binding + i, target, ret_type, ret_type, + ret_type, ret_type); ureg_DECL_sampler(c->ureg, var->data.binding + i); } - /* lower_uniforms_to_ubo lowered non-sampler uniforms to UBOs, so CB0 - * size declaration happens with other UBOs below. - */ + /* lower_uniforms_to_ubo lowered non-sampler uniforms to UBOs, so CB0 + * size declaration happens with other UBOs below. + */ } } c->first_ubo = ~0; unsigned ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS] = {0}; - nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ubo) { + nir_foreach_variable_with_modes (var, c->s, nir_var_mem_ubo) { int ubo = var->data.driver_location; if (ubo == -1) continue; @@ -888,7 +845,7 @@ ntr_setup_registers(struct ntr_compile *c) { assert(c->num_temps == 0); - nir_foreach_reg_decl_safe(nir_reg, nir_shader_get_entrypoint(c->s)) { + nir_foreach_reg_decl_safe (nir_reg, nir_shader_get_entrypoint(c->s)) { /* Permanently allocate all the array regs at the start. */ unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg); unsigned index = nir_reg->def.index; @@ -905,7 +862,7 @@ ntr_setup_registers(struct ntr_compile *c) /* After that, allocate non-array regs in our virtual space that we'll * register-allocate before ureg emit. */ - nir_foreach_reg_decl_safe(nir_reg, nir_shader_get_entrypoint(c->s)) { + nir_foreach_reg_decl_safe (nir_reg, nir_shader_get_entrypoint(c->s)) { unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg); unsigned num_components = nir_intrinsic_num_components(nir_reg); unsigned index = nir_reg->def.index; @@ -943,8 +900,7 @@ ntr_reladdr(struct ntr_compile *c, struct ureg_src addr, int addr_index) for (int i = 0; i <= addr_index; i++) { if (!c->addr_declared[i]) { - c->addr_reg[i] = ureg_writemask(ureg_DECL_address(c->ureg), - TGSI_WRITEMASK_X); + c->addr_reg[i] = ureg_writemask(ureg_DECL_address(c->ureg), TGSI_WRITEMASK_X); c->addr_declared[i] = true; } } @@ -954,8 +910,7 @@ ntr_reladdr(struct ntr_compile *c, struct ureg_src addr, int addr_index) } /* Forward declare for recursion with indirects */ -static struct ureg_src -ntr_get_src(struct ntr_compile *c, nir_src src); +static struct ureg_src ntr_get_src(struct ntr_compile *c, nir_src src); static struct ureg_src ntr_get_chased_src(struct ntr_compile *c, nir_legacy_src *src) @@ -971,8 +926,7 @@ ntr_get_chased_src(struct ntr_compile *c, nir_legacy_src *src) if (src->reg.indirect) { struct ureg_src offset = ntr_get_src(c, nir_src_for_ssa(src->reg.indirect)); - return ureg_src_indirect(ureg_src(reg_temp), - ntr_reladdr(c, offset, 0)); + return ureg_src_indirect(ureg_src(reg_temp), ntr_reladdr(c, offset, 0)); } else { return ureg_src(reg_temp); } @@ -996,15 +950,10 @@ ntr_get_alu_src(struct ntr_compile *c, nir_alu_instr *instr, int i) * The lower_fabs requests that we not have native source modifiers * for fabs, and instead emit MAX(a,-a) for nir_op_fabs. */ - nir_legacy_alu_src src = - nir_legacy_chase_alu_src(&instr->src[i], !c->lower_fabs); + nir_legacy_alu_src src = nir_legacy_chase_alu_src(&instr->src[i], !c->lower_fabs); struct ureg_src usrc = ntr_get_chased_src(c, &src.src); - usrc = ureg_swizzle(usrc, - src.swizzle[0], - src.swizzle[1], - src.swizzle[2], - src.swizzle[3]); + usrc = ureg_swizzle(usrc, src.swizzle[0], src.swizzle[1], src.swizzle[2], src.swizzle[3]); if (src.fabs) usrc = ureg_abs(usrc); @@ -1022,8 +971,7 @@ ntr_swizzle_for_write_mask(struct ureg_src src, uint32_t write_mask) { assert(write_mask); int first_chan = ffs(write_mask) - 1; - return ureg_swizzle(src, - (write_mask & TGSI_WRITEMASK_X) ? TGSI_SWIZZLE_X : first_chan, + return ureg_swizzle(src, (write_mask & TGSI_WRITEMASK_X) ? TGSI_SWIZZLE_X : first_chan, (write_mask & TGSI_WRITEMASK_Y) ? TGSI_SWIZZLE_Y : first_chan, (write_mask & TGSI_WRITEMASK_Z) ? TGSI_SWIZZLE_Z : first_chan, (write_mask & TGSI_WRITEMASK_W) ? TGSI_SWIZZLE_W : first_chan); @@ -1132,9 +1080,7 @@ ntr_store(struct ntr_compile *c, nir_def *def, struct ureg_src src) } static void -ntr_emit_scalar(struct ntr_compile *c, unsigned tgsi_op, - struct ureg_dst dst, - struct ureg_src src0, +ntr_emit_scalar(struct ntr_compile *c, unsigned tgsi_op, struct ureg_dst dst, struct ureg_src src0, struct ureg_src src1) { unsigned i; @@ -1145,11 +1091,8 @@ ntr_emit_scalar(struct ntr_compile *c, unsigned tgsi_op, for (i = 0; i < 4; i++) { if (dst.WriteMask & (1 << i)) { - ntr_insn(c, tgsi_op, - ureg_writemask(dst, 1 << i), - ureg_scalar(src0, i), - ureg_scalar(src1, i), - ureg_src_undef(), ureg_src_undef()); + ntr_insn(c, tgsi_op, ureg_writemask(dst, 1 << i), ureg_scalar(src0, i), + ureg_scalar(src1, i), ureg_src_undef(), ureg_src_undef()); } } } @@ -1203,8 +1146,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr) if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) { /* The normal path for NIR to TGSI ALU op translation */ - ntr_insn(c, op_map[instr->op], - dst, src[0], src[1], src[2], src[3]); + ntr_insn(c, op_map[instr->op], dst, src[0], src[1], src[2], src[3]); } else { /* Special cases for NIR to TGSI ALU op translation. */ @@ -1309,8 +1251,7 @@ ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr) } static struct ureg_src -ntr_ureg_src_indirect(struct ntr_compile *c, struct ureg_src usrc, - nir_src src, int addr_reg) +ntr_ureg_src_indirect(struct ntr_compile *c, struct ureg_src usrc, nir_src src, int addr_reg) { if (nir_src_is_const(src)) { usrc.Index += ntr_src_as_uint(c, src); @@ -1321,8 +1262,7 @@ ntr_ureg_src_indirect(struct ntr_compile *c, struct ureg_src usrc, } static struct ureg_dst -ntr_ureg_dst_indirect(struct ntr_compile *c, struct ureg_dst dst, - nir_src src) +ntr_ureg_dst_indirect(struct ntr_compile *c, struct ureg_dst dst, nir_src src) { if (nir_src_is_const(src)) { dst.Index += ntr_src_as_uint(c, src); @@ -1333,15 +1273,12 @@ ntr_ureg_dst_indirect(struct ntr_compile *c, struct ureg_dst dst, } static struct ureg_dst -ntr_ureg_dst_dimension_indirect(struct ntr_compile *c, struct ureg_dst udst, - nir_src src) +ntr_ureg_dst_dimension_indirect(struct ntr_compile *c, struct ureg_dst udst, nir_src src) { if (nir_src_is_const(src)) { return ureg_dst_dimension(udst, ntr_src_as_uint(c, src)); } else { - return ureg_dst_dimension_indirect(udst, - ntr_reladdr(c, ntr_get_src(c, src), 1), - 0); + return ureg_dst_dimension_indirect(udst, ntr_reladdr(c, ntr_get_src(c, src), 1), 0); } } /* Some load operations in NIR will have a fractional offset that we need to @@ -1350,14 +1287,10 @@ ntr_ureg_dst_dimension_indirect(struct ntr_compile *c, struct ureg_dst udst, static struct ureg_src ntr_shift_by_frac(struct ureg_src src, unsigned frac, unsigned num_components) { - return ureg_swizzle(src, - frac, - frac + MIN2(num_components - 1, 1), - frac + MIN2(num_components - 1, 2), - frac + MIN2(num_components - 1, 3)); + return ureg_swizzle(src, frac, frac + MIN2(num_components - 1, 1), + frac + MIN2(num_components - 1, 2), frac + MIN2(num_components - 1, 3)); } - static void ntr_emit_load_ubo(struct ntr_compile *c, nir_intrinsic_instr *instr) { @@ -1378,9 +1311,7 @@ ntr_emit_load_ubo(struct ntr_compile *c, nir_intrinsic_instr *instr) */ addr_temp = ntr_temp(c); ntr_UADD(c, addr_temp, ntr_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo)); - src = ureg_src_dimension_indirect(src, - ntr_reladdr(c, ureg_src(addr_temp), 1), - c->first_ubo); + src = ureg_src_dimension_indirect(src, ntr_reladdr(c, ureg_src(addr_temp), 1), c->first_ubo); } /* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const @@ -1429,8 +1360,7 @@ ntr_emit_load_input(struct ntr_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_interpolated_input: { input = ntr_ureg_src_indirect(c, input, instr->src[1], 0); - nir_intrinsic_instr *bary_instr = - nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); + nir_intrinsic_instr *bary_instr = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); switch (bary_instr->intrinsic) { case nir_intrinsic_load_barycentric_pixel: @@ -1455,14 +1385,12 @@ ntr_emit_load_input(struct ntr_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_barycentric_at_sample: /* We stored the sample in the fake "bary" dest. */ - ntr_INTERP_SAMPLE(c, ntr_get_dest(c, &instr->def), input, - ntr_get_src(c, instr->src[0])); + ntr_INTERP_SAMPLE(c, ntr_get_dest(c, &instr->def), input, ntr_get_src(c, instr->src[0])); break; case nir_intrinsic_load_barycentric_at_offset: /* We stored the offset in the fake "bary" dest. */ - ntr_INTERP_OFFSET(c, ntr_get_dest(c, &instr->def), input, - ntr_get_src(c, instr->src[0])); + ntr_INTERP_OFFSET(c, ntr_get_dest(c, &instr->def), input, ntr_get_src(c, instr->src[0])); break; default: @@ -1499,7 +1427,7 @@ ntr_emit_store_output(struct ntr_compile *c, nir_intrinsic_instr *instr) out = ntr_ureg_dst_indirect(c, out, instr->src[1]); } - uint8_t swizzle[4] = { 0, 0, 0, 0 }; + uint8_t swizzle[4] = {0, 0, 0, 0}; for (int i = frac; i < 4; i++) { if (out.WriteMask & (1 << i)) swizzle[i] = i - frac; @@ -1539,7 +1467,7 @@ ntr_emit_load_output(struct ntr_compile *c, nir_intrinsic_instr *instr) * TESS_LEVEL_INNER. */ int fill_channel = ffs(dst.WriteMask) - 1; - uint8_t swizzles[4] = { 0, 1, 2, 3 }; + uint8_t swizzles[4] = {0, 1, 2, 3}; for (int i = 0; i < 4; i++) if (!(dst.WriteMask & (1 << i))) swizzles[i] = fill_channel; @@ -1671,9 +1599,7 @@ struct ntr_tex_operand_state { }; static void -ntr_push_tex_arg(struct ntr_compile *c, - nir_tex_instr *instr, - nir_tex_src_type tex_src_type, +ntr_push_tex_arg(struct ntr_compile *c, nir_tex_instr *instr, nir_tex_src_type tex_src_type, struct ntr_tex_operand_state *s) { int tex_src = nir_tex_instr_src_index(instr, tex_src_type); @@ -1688,7 +1614,8 @@ static void ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) { struct ureg_dst dst = ntr_get_dest(c, &instr->def); - enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(instr->sampler_dim, instr->is_array, instr->is_shadow); + enum tgsi_texture_type target = + tgsi_texture_type_from_sampler_dim(instr->sampler_dim, instr->is_array, instr->is_shadow); unsigned tex_opcode; int tex_handle_src = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle); @@ -1712,7 +1639,7 @@ ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) switch (instr->op) { case nir_texop_tex: if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) > - MAX2(instr->coord_components, 2) + instr->is_shadow) + MAX2(instr->coord_components, 2) + instr->is_shadow) tex_opcode = TGSI_OPCODE_TXP; else tex_opcode = TGSI_OPCODE_TEX; @@ -1745,7 +1672,7 @@ ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) unreachable("unsupported tex op"); } - struct ntr_tex_operand_state s = { .i = 0 }; + struct ntr_tex_operand_state s = {.i = 0}; ntr_push_tex_arg(c, instr, nir_tex_src_backend1, &s); ntr_push_tex_arg(c, instr, nir_tex_src_backend2, &s); @@ -1776,8 +1703,7 @@ ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) } if (instr->op == nir_texop_tg4 && target != TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - if (c->screen->get_param(c->screen, - PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE)) { + if (c->screen->get_param(c->screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE)) { sampler = ureg_scalar(sampler, instr->component); s.srcs[s.i++] = ureg_src_undef(); } else { @@ -1811,7 +1737,8 @@ ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) while (s.i < 4) s.srcs[s.i++] = ureg_src_undef(); - struct ntr_insn *insn = ntr_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]); + struct ntr_insn *insn = + ntr_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]); insn->tex_target = target; insn->tex_return_type = tex_type; insn->is_tex = true; @@ -1830,7 +1757,8 @@ ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr) if (nir_tex_instr_has_explicit_tg4_offsets(instr)) { for (uint8_t i = 0; i < 4; ++i) { - struct ureg_src imm = ureg_imm2i(c->ureg, instr->tg4_offsets[i][0], instr->tg4_offsets[i][1]); + struct ureg_src imm = + ureg_imm2i(c->ureg, instr->tg4_offsets[i][0], instr->tg4_offsets[i][1]); insn->tex_offset[i].File = imm.File; insn->tex_offset[i].Index = imm.Index; insn->tex_offset[i].SwizzleX = imm.SwizzleX; @@ -1943,7 +1871,7 @@ ntr_emit_block(struct ntr_compile *c, nir_block *block) struct ntr_block *ntr_block = ntr_block_from_nir(c, block); c->cur_block = ntr_block; - nir_foreach_instr(instr, block) { + nir_foreach_instr (instr, block) { ntr_emit_instr(c, instr); /* Sanity check that we didn't accidentally ureg_OPCODE() instead of ntr_OPCODE(). */ @@ -1970,7 +1898,7 @@ ntr_emit_block(struct ntr_compile *c, nir_block *block) static void ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list) { - foreach_list_typed(nir_cf_node, node, node, list) { + foreach_list_typed (nir_cf_node, node, node, list) { switch (node->type) { case nir_cf_node_block: ntr_emit_block(c, nir_cf_node_as_block(node)); @@ -1996,9 +1924,8 @@ ntr_emit_block_ureg(struct ntr_compile *c, struct nir_block *block) struct ntr_block *ntr_block = ntr_block_from_nir(c, block); /* Emit the ntr insns to tgsi_ureg. */ - util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) { - const struct tgsi_opcode_info *opcode_info = - tgsi_get_opcode_info(insn->opcode); + util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) { + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode); switch (insn->opcode) { case TGSI_OPCODE_IF: @@ -2035,17 +1962,12 @@ ntr_emit_block_ureg(struct ntr_compile *c, struct nir_block *block) if (insn->tex_offset[i].File != TGSI_FILE_NULL) num_offsets = i + 1; } - ureg_tex_insn(c->ureg, insn->opcode, - insn->dst, opcode_info->num_dst, - insn->tex_target, insn->tex_return_type, - insn->tex_offset, - num_offsets, - insn->src, opcode_info->num_src); + ureg_tex_insn(c->ureg, insn->opcode, insn->dst, opcode_info->num_dst, insn->tex_target, + insn->tex_return_type, insn->tex_offset, num_offsets, insn->src, + opcode_info->num_src); } else { - ureg_insn(c->ureg, insn->opcode, - insn->dst, opcode_info->num_dst, - insn->src, opcode_info->num_src, - insn->precise); + ureg_insn(c->ureg, insn->opcode, insn->dst, opcode_info->num_dst, insn->src, + opcode_info->num_src, insn->precise); } } } @@ -2071,7 +1993,7 @@ ntr_emit_if_ureg(struct ntr_compile *c, nir_if *if_stmt) static void ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list) { - foreach_list_typed(nir_cf_node, node, node, list) { + foreach_list_typed (nir_cf_node, node, node, list) { switch (node->type) { case nir_cf_node_block: ntr_emit_block_ureg(c, nir_cf_node_as_block(node)); @@ -2105,13 +2027,12 @@ ntr_emit_impl(struct ntr_compile *c, nir_function_impl *impl) /* Set up the struct ntr_blocks to put insns in */ c->blocks = _mesa_pointer_hash_table_create(c); - nir_foreach_block(block, impl) { + nir_foreach_block (block, impl) { struct ntr_block *ntr_block = rzalloc(c->blocks, struct ntr_block); util_dynarray_init(&ntr_block->insns, ntr_block); _mesa_hash_table_insert(c->blocks, block, ntr_block); } - ntr_setup_registers(c); c->cur_block = ntr_block_from_nir(c, nir_start_block(impl)); @@ -2132,7 +2053,6 @@ ntr_emit_impl(struct ntr_compile *c, nir_function_impl *impl) ralloc_free(c->liveness); c->liveness = NULL; - } static int @@ -2153,9 +2073,8 @@ ntr_should_vectorize_instr(const nir_instr *instr, const void *data) } static bool -ntr_should_vectorize_io(unsigned align, unsigned bit_size, - unsigned num_components, unsigned high_offset, - nir_intrinsic_instr *low, nir_intrinsic_instr *high, +ntr_should_vectorize_io(unsigned align, unsigned bit_size, unsigned num_components, + unsigned high_offset, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { if (bit_size != 32) @@ -2182,18 +2101,15 @@ ntr_no_indirects_mask(nir_shader *s, struct pipe_screen *screen) unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage); unsigned indirect_mask = 0; - if (!screen->get_shader_param(screen, pipe_stage, - PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR)) { + if (!screen->get_shader_param(screen, pipe_stage, PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR)) { indirect_mask |= nir_var_shader_in; } - if (!screen->get_shader_param(screen, pipe_stage, - PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR)) { + if (!screen->get_shader_param(screen, pipe_stage, PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR)) { indirect_mask |= nir_var_shader_out; } - if (!screen->get_shader_param(screen, pipe_stage, - PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR)) { + if (!screen->get_shader_param(screen, pipe_stage, PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR)) { indirect_mask |= nir_var_function_temp; } @@ -2206,10 +2122,8 @@ struct ntr_lower_tex_state { }; static void -nir_to_rc_lower_tex_instr_arg(nir_builder *b, - nir_tex_instr *instr, - nir_tex_src_type tex_src_type, - struct ntr_lower_tex_state *s) +nir_to_rc_lower_tex_instr_arg(nir_builder *b, nir_tex_instr *instr, nir_tex_src_type tex_src_type, + struct ntr_lower_tex_state *s) { int tex_src = nir_tex_instr_src_index(instr, tex_src_type); if (tex_src < 0) @@ -2271,11 +2185,9 @@ nir_to_rc_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data) s.channels[i] = s.channels[0]; } - nir_tex_instr_add_src(tex, nir_tex_src_backend1, - nir_vec_scalars(b, s.channels, MIN2(s.i, 4))); + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_vec_scalars(b, s.channels, MIN2(s.i, 4))); if (s.i > 4) - nir_tex_instr_add_src(tex, nir_tex_src_backend2, - nir_vec_scalars(b, &s.channels[4], s.i - 4)); + nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_vec_scalars(b, &s.channels[4], s.i - 4)); return true; } @@ -2283,9 +2195,7 @@ nir_to_rc_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data) static bool nir_to_rc_lower_tex(nir_shader *s) { - return nir_shader_instructions_pass(s, - nir_to_rc_lower_tex_instr, - nir_metadata_control_flow, + return nir_shader_instructions_pass(s, nir_to_rc_lower_tex_instr, nir_metadata_control_flow, NULL); } @@ -2294,11 +2204,11 @@ static void nir_to_rc_lower_txp(nir_shader *s) { nir_lower_tex_options lower_tex_options = { - .lower_txp = 0, + .lower_txp = 0, }; - nir_foreach_block(block, nir_shader_get_entrypoint(s)) { - nir_foreach_instr(instr, block) { + nir_foreach_block (block, nir_shader_get_entrypoint(s)) { + nir_foreach_instr (instr, block) { if (instr->type != nir_instr_type_tex) continue; nir_tex_instr *tex = nir_instr_as_tex(instr); @@ -2307,7 +2217,8 @@ nir_to_rc_lower_txp(nir_shader *s) continue; bool has_compare = nir_tex_instr_src_index(tex, nir_tex_src_comparator) >= 0; - bool has_lod = nir_tex_instr_src_index(tex, nir_tex_src_lod) >= 0 || s->info.stage != MESA_SHADER_FRAGMENT; + bool has_lod = nir_tex_instr_src_index(tex, nir_tex_src_lod) >= 0 || + s->info.stage != MESA_SHADER_FRAGMENT; bool has_offset = nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; /* We can do TXP for any tex (not txg) where we can fit all the @@ -2317,7 +2228,8 @@ nir_to_rc_lower_txp(nir_shader *s) * nir_lower_tex() only handles the lowering on a sampler-dim basis, so * if we get any funny projectors then we just blow them all away. */ - if (tex->op != nir_texop_tex || has_lod || has_offset || (tex->coord_components >= 3 && has_compare)) + if (tex->op != nir_texop_tex || has_lod || has_offset || + (tex->coord_components >= 3 && has_compare)) lower_tex_options.lower_txp |= 1 << tex->sampler_dim; } } @@ -2335,8 +2247,8 @@ nir_to_rc_lower_txp(nir_shader *s) * We take ownership of the NIR shader passed, returning a reference to the new * TGSI tokens instead. If you need to keep the NIR, then pass us a clone. */ -const void *nir_to_rc(struct nir_shader *s, - struct pipe_screen *screen) +const void * +nir_to_rc(struct nir_shader *s, struct pipe_screen *screen) { struct ntr_compile *c; const void *tgsi_tokens; @@ -2359,8 +2271,8 @@ const void *nir_to_rc(struct nir_shader *s, NIR_PASS_V(s, nir_remove_dead_variables, nir_var_shader_in, NULL); } - NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, - type_size, (nir_lower_io_options)0); + NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size, + (nir_lower_io_options)0); nir_to_rc_lower_txp(s); NIR_PASS_V(s, nir_to_rc_lower_tex); @@ -2386,8 +2298,7 @@ const void *nir_to_rc(struct nir_shader *s, NIR_PASS_V(s, nir_lower_int_to_float); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, r300_nir_post_integer_lowering); - NIR_PASS_V(s, nir_lower_bool_to_float, - is_r500 || s->info.stage == MESA_SHADER_FRAGMENT); + NIR_PASS_V(s, nir_lower_bool_to_float, is_r500 || s->info.stage == MESA_SHADER_FRAGMENT); /* bool_to_float generates MOVs for b2f32 that we want to clean up. */ NIR_PASS_V(s, nir_copy_prop); /* CSE cleanup after late ftrunc lowering. */ @@ -2408,9 +2319,8 @@ const void *nir_to_rc(struct nir_shader *s, NIR_PASS_V(s, r300_nir_opt_algebraic_late); NIR_PASS_V(s, nir_opt_dce); - nir_move_options move_all = - nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | - nir_move_comparisons | nir_move_copies | nir_move_load_ssbo; + nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | + nir_move_comparisons | nir_move_copies | nir_move_load_ssbo; NIR_PASS_V(s, nir_opt_move, move_all); NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, true); @@ -2451,14 +2361,12 @@ const void *nir_to_rc(struct nir_shader *s, if ((s->info.inputs_read & VARYING_BIT_POS) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) { ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, - s->info.fs.origin_upper_left ? - TGSI_FS_COORD_ORIGIN_UPPER_LEFT : - TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + s->info.fs.origin_upper_left ? TGSI_FS_COORD_ORIGIN_UPPER_LEFT + : TGSI_FS_COORD_ORIGIN_LOWER_LEFT); ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, - s->info.fs.pixel_center_integer ? - TGSI_FS_COORD_PIXEL_CENTER_INTEGER : - TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); + s->info.fs.pixel_center_integer ? TGSI_FS_COORD_PIXEL_CENTER_INTEGER + : TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); } } /* Emit the main function */ diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.h b/src/gallium/drivers/r300/compiler/nir_to_rc.h index 26a94ec96fa..2b82bc3494d 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.h +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.h @@ -12,7 +12,6 @@ struct nir_shader; struct pipe_screen; -const void *nir_to_rc(struct nir_shader *s, - struct pipe_screen *screen); +const void *nir_to_rc(struct nir_shader *s, struct pipe_screen *screen); #endif /* NIR_TO_RC_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c index 53343fae07f..fc4d68daa5b 100644 --- a/src/gallium/drivers/r300/compiler/r300_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c @@ -9,308 +9,257 @@ #include "r300_reg.h" -static void presub_string(char out[10], unsigned int inst) +static void +presub_string(char out[10], unsigned int inst) { - switch(inst & 0x600000){ - case R300_ALU_SRCP_1_MINUS_2_SRC0: - sprintf(out, "bias"); - break; - case R300_ALU_SRCP_SRC1_MINUS_SRC0: - sprintf(out, "sub"); - break; - case R300_ALU_SRCP_SRC1_PLUS_SRC0: - sprintf(out, "add"); - break; - case R300_ALU_SRCP_1_MINUS_SRC0: - sprintf(out, "inv "); - break; - } + switch (inst & 0x600000) { + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } } -static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +static int +get_msb(unsigned int bit, unsigned int r400_ext_addr) { - return (r400_ext_addr & bit) ? 1 << 5 : 0; + return (r400_ext_addr & bit) ? 1 << 5 : 0; } /* just some random things... */ -void r300FragmentProgramDump(struct radeon_compiler *c, void *user) +void +r300FragmentProgramDump(struct radeon_compiler *c, void *user) { - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r300_fragment_program_code *code = &compiler->code->code.r300; - int n, i, j; - static int pc = 0; + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + int n, i, j; + static int pc = 0; - fprintf(stderr, "pc=%d*************************************\n", pc++); + fprintf(stderr, "pc=%d*************************************\n", pc++); - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - if (c->is_r400) { - fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); - } + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } - for (n = 0; n <= (code->config & 3); n++) { - uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + - (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); - unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + - (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); - int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; - int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " - "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, - alu_offset, tex_offset, alu_end, tex_end, code_addr); + fprintf(stderr, + "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", + n, alu_offset, tex_offset, alu_end, tex_end, code_addr); - if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { - fprintf(stderr, " TEX:\n"); - for (i = tex_offset; - i <= tex_offset + tex_end; - ++i) { - const char *instr; + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; i <= tex_offset + tex_end; ++i) { + const char *instr; - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } + switch ((code->tex.inst[i] >> R300_TEX_INST_SHIFT) & 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } + fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr, + (code->tex.inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', + (code->tex.inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex.inst[i] & R300_TEX_ID_MASK) >> R300_TEX_ID_SHIFT, code->tex.inst[i]); + } + } - for (i = alu_offset; - i <= alu_offset + alu_end; ++i) { - char srcc[4][10], dstc[20]; - char srca[4][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; + for (i = alu_offset; i <= alu_offset + alu_end; ++i) { + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].rgb_addr >> (j * 6); - int rega = code->alu.inst[i].alpha_addr >> (j * 6); - int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), - code->alu.inst[i].r400_ext_addr); - int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), - code->alu.inst[i].r400_ext_addr); + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), code->alu.inst[i].r400_ext_addr); - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', (regc & 31) | msbc); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', (rega & 31) | msba); - } + sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', (regc & 31) | msbc); + sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', (rega & 31) | msba); + } - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - unsigned int msb = get_msb( - R400_ADDRD_EXT_RGB_MSB_BIT, - code->alu.inst[i].r400_ext_addr); + dstc[0] = 0; + sprintf(flags, "%s%s%s", (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + unsigned int msb = get_msb(R400_ADDRD_EXT_RGB_MSB_BIT, code->alu.inst[i].r400_ext_addr); - sprintf(dstc, "t%i.%s ", - ((code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) - & 31) | msb, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - rgb_addr >> 29) & 3, - flags); - strcat(dstc, tmp); - } - /* Presub */ - presub_string(srcc[3], code->alu.inst[i].rgb_inst); - presub_string(srca[3], code->alu.inst[i].alpha_inst); + sprintf(dstc, "t%i.%s ", + ((code->alu.inst[i].rgb_addr >> R300_ALU_DSTC_SHIFT) & 31) | msb, flags); + } + sprintf(flags, "%s%s%s", (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", (code->alu.inst[i].rgb_addr >> 29) & 3, flags); + strcat(dstc, tmp); + } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); - dsta[0] = 0; - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { - unsigned int msb = get_msb( - R400_ADDRD_EXT_A_MSB_BIT, - code->alu.inst[i].r400_ext_addr); - sprintf(dsta, "t%i.w ", - ((code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) - | msb); - } - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - alpha_addr >> 25) & 3); - strcat(dsta, tmp); - } - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb(R400_ADDRD_EXT_A_MSB_BIT, code->alu.inst[i].r400_ext_addr); + sprintf(dsta, "t%i.w ", + ((code->alu.inst[i].alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) | msb); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", (code->alu.inst[i].alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } - fprintf(stderr, - "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" - " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], srcc[3], dstc, - code->alu.inst[i].rgb_addr, srca[0], srca[1], - srca[2], srca[3], dsta, - code->alu.inst[i].alpha_addr); + fprintf(stderr, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", + i, srcc[0], srcc[1], srcc[2], srcc[3], dstc, code->alu.inst[i].rgb_addr, srca[0], + srca[1], srca[2], srca[3], dsta, code->alu.inst[i].alpha_addr); - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].rgb_inst >> (j * 7); - int rega = code->alu.inst[i].alpha_inst >> (j * 7); - int d; - char buf[20]; + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d < 20 ) { - switch(d) { - case R300_ALU_ARGC_SRCP_XYZ: - sprintf(buf, "srcp.xyz"); - break; - case R300_ALU_ARGC_SRCP_XXX: - sprintf(buf, "srcp.xxx"); - break; - case R300_ALU_ARGC_SRCP_YYY: - sprintf(buf, "srcp.yyy"); - break; - case R300_ALU_ARGC_SRCP_ZZZ: - sprintf(buf, "srcp.zzz"); - break; - case R300_ALU_ARGC_SRCP_WWW: - sprintf(buf, "srcp.www"); - break; - } - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20) { + switch (d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); + sprintf(argc[j], "%s%s%s%s", (regc & 32) ? "-" : "", (regc & 64) ? "|" : "", buf, + (regc & 64) ? "|" : ""); - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d < 16) { - switch(d) { - case R300_ALU_ARGA_SRCP_X: - sprintf(buf, "srcp.x"); - break; - case R300_ALU_ARGA_SRCP_Y: - sprintf(buf, "srcp.y"); - break; - case R300_ALU_ARGA_SRCP_Z: - sprintf(buf, "srcp.z"); - break; - case R300_ALU_ARGA_SRCP_W: - sprintf(buf, "srcp.w"); - break; - } - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch (d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } + sprintf(arga[j], "%s%s%s%s", (rega & 32) ? "-" : "", (rega & 64) ? "|" : "", buf, + (rega & 64) ? "|" : ""); + } - fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].rgb_inst, - code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? - "NOP" : "", - arga[0], arga[1],arga[2], - code->alu.inst[i].alpha_inst); - } - } + fprintf(stderr, + " xyz: %8s %8s %8s op: %08x %s\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? "NOP" : "", arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } } diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h index 02244a48fb9..df3ad9cb77c 100644 --- a/src/gallium/drivers/r300/compiler/r300_fragprog.h +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h @@ -12,7 +12,6 @@ #include "radeon_compiler.h" #include "radeon_program.h" - extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c index afbde837b3f..eaf40643efe 100644 --- a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c @@ -18,41 +18,42 @@ #include "r300_reg.h" -#include "radeon_program_pair.h" #include "r300_fragprog_swizzle.h" +#include "radeon_program_pair.h" #include "util/compiler.h" - struct r300_emit_state { - struct r300_fragment_program_compiler * compiler; + struct r300_fragment_program_compiler *compiler; - unsigned current_node : 2; - unsigned node_first_tex : 8; - unsigned node_first_alu : 8; - uint32_t node_flags; + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; }; -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = emit->compiler; \ - struct r300_fragment_program_code *code = &c->code->code.r300 +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 -#define error(fmt, args...) do { \ - rc_error(&c->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __func__, ##args); \ - } while(0) +#define error(fmt, args...) \ + do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \ + } while (0) -static unsigned int get_msbs_alu(unsigned int bits) +static unsigned int +get_msbs_alu(unsigned int bits) { - return (bits >> 6) & 0x7; + return (bits >> 6) & 0x7; } /** * @param lsbs The number of least significant bits */ -static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +static unsigned int +get_msbs_tex(unsigned int bits, unsigned int lsbs) { - return (bits >> lsbs) & 0x15; + return (bits >> lsbs) & 0x15; } #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) @@ -60,477 +61,437 @@ static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) /** * Mark a temporary register as used. */ -static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +static void +use_temporary(struct r300_fragment_program_code *code, unsigned int index) { - if (index > code->pixsize) - code->pixsize = index; + if (index > code->pixsize) + code->pixsize = index; } -static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) +static unsigned int +use_source(struct r300_fragment_program_code *code, struct rc_pair_instruction_source src) { - if (!src.Used) - return 0; + if (!src.Used) + return 0; - if (src.File == RC_FILE_CONSTANT) { - return src.Index | (1 << 5); - } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { - use_temporary(code, src.Index); - return src.Index & 0x1f; - } + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index & 0x1f; + } - return 0; + return 0; } - -static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +static unsigned int +translate_rgb_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode) { - switch(opcode) { - case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; - case RC_OPCODE_CND: return R300_ALU_OUTC_CND; - case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; - case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; - case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; - default: - error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); - FALLTHROUGH; - case RC_OPCODE_NOP: - FALLTHROUGH; - case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; - case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; - case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; - case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; - } + switch (opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + FALLTHROUGH; + case RC_OPCODE_NOP: FALLTHROUGH; + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } } -static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +static unsigned int +translate_alpha_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode) { - switch(opcode) { - case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; - case RC_OPCODE_CND: return R300_ALU_OUTA_CND; - case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; - case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; - case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; - case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; - case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; - default: - error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); - FALLTHROUGH; - case RC_OPCODE_NOP: - FALLTHROUGH; - case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; - case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; - case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; - case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; - case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; - } + switch (opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + FALLTHROUGH; + case RC_OPCODE_NOP: FALLTHROUGH; + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } } /** * Emit one paired ALU instruction. */ -static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +static int +emit_alu(struct r300_emit_state *emit, struct rc_pair_instruction *inst) { - int ip; - int j; - PROG_CODE; + int ip; + int j; + PROG_CODE; - if (code->alu.length >= c->Base.max_alu_insts) { - /* rc_recompute_ips does not give an exact count, because it counts extra stuff - * like BEGINTEX, but here it is intended to be only approximative anyway, - * just to give some idea how close to the limit we are. */ - rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n", - rc_recompute_ips(&c->Base), c->Base.max_alu_insts); - return 0; - } + if (code->alu.length >= c->Base.max_alu_insts) { + /* rc_recompute_ips does not give an exact count, because it counts extra stuff + * like BEGINTEX, but here it is intended to be only approximative anyway, + * just to give some idea how close to the limit we are. */ + rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n", + rc_recompute_ips(&c->Base), c->Base.max_alu_insts); + return 0; + } - ip = code->alu.length++; + ip = code->alu.length++; - code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); - code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); - for(j = 0; j < 3; ++j) { - /* Set the RGB address */ - unsigned int src = use_source(code, inst->RGB.Src[j]); - unsigned int arg; - if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + for (j = 0; j < 3; ++j) { + /* Set the RGB address */ + unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); - code->alu.inst[ip].rgb_addr |= src << (6*j); + code->alu.inst[ip].rgb_addr |= src << (6 * j); - /* Set the Alpha address */ - src = use_source(code, inst->Alpha.Src[j]); - if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + /* Set the Alpha address */ + src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); - code->alu.inst[ip].alpha_addr |= src << (6*j); + code->alu.inst[ip].alpha_addr |= src << (6 * j); - arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); - arg |= inst->RGB.Arg[j].Abs << 6; - arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].rgb_inst |= arg << (7*j); + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7 * j); - arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); - arg |= inst->Alpha.Arg[j].Abs << 6; - arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].alpha_inst |= arg << (7*j); - } + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7 * j); + } - /* Presubtract */ - if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_1_MINUS_2_SRC0; - break; - case RC_PRESUB_ADD: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_SRC1_PLUS_SRC0; - break; - case RC_PRESUB_SUB: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_SRC1_MINUS_SRC0; - break; - case RC_PRESUB_INV: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_1_MINUS_SRC0; - break; - default: - break; - } - } + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } - if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_1_MINUS_2_SRC0; - break; - case RC_PRESUB_ADD: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_SRC1_PLUS_SRC0; - break; - case RC_PRESUB_SUB: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_SRC1_MINUS_SRC0; - break; - case RC_PRESUB_INV: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_1_MINUS_SRC0; - break; - default: - break; - } - } + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } - if (inst->RGB.Saturate) - code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; - if (inst->Alpha.Saturate) - code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; - if (inst->RGB.WriteMask) { - use_temporary(code, inst->RGB.DestIndex); - if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; - code->alu.inst[ip].rgb_addr |= - ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | - (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); - } - if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].rgb_addr |= - (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | - R300_RGB_TARGET(inst->RGB.Target); - emit->node_flags |= R300_RGBA_OUT; - } + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; + code->alu.inst[ip].rgb_addr |= ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } - if (inst->Alpha.WriteMask) { - use_temporary(code, inst->Alpha.DestIndex); - if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; - code->alu.inst[ip].alpha_addr |= - ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_REG; - } - if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | - R300_ALPHA_TARGET(inst->Alpha.Target); - emit->node_flags |= R300_RGBA_OUT; - } - if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; - emit->node_flags |= R300_W_OUT; - c->code->writes_depth = 1; - } - if (inst->Nop) - code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; + code->alu.inst[ip].alpha_addr |= + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; - /* Handle Output Modifier - * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */ - if (inst->RGB.Omod) { - if (inst->RGB.Omod == RC_OMOD_DISABLE) { - rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); - } - code->alu.inst[ip].rgb_inst |= - (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT); - } - if (inst->Alpha.Omod) { - if (inst->Alpha.Omod == RC_OMOD_DISABLE) { - rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); - } - code->alu.inst[ip].alpha_inst |= - (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT); - } - return 1; + /* Handle Output Modifier + * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */ + if (inst->RGB.Omod) { + if (inst->RGB.Omod == RC_OMOD_DISABLE) { + rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); + } + code->alu.inst[ip].rgb_inst |= (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT); + } + if (inst->Alpha.Omod) { + if (inst->Alpha.Omod == RC_OMOD_DISABLE) { + rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); + } + code->alu.inst[ip].alpha_inst |= (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT); + } + return 1; } - /** * Finish the current node without advancing to the next one. */ -static int finish_node(struct r300_emit_state * emit) +static int +finish_node(struct r300_emit_state *emit) { - struct r300_fragment_program_compiler * c = emit->compiler; - struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - unsigned alu_offset; - unsigned alu_end; - unsigned tex_offset; - unsigned tex_end; + struct r300_fragment_program_compiler *c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; - unsigned int alu_offset_msbs, alu_end_msbs; + unsigned int alu_offset_msbs, alu_end_msbs; - if (code->alu.length == emit->node_first_alu) { - /* Generate a single NOP for this node */ - struct rc_pair_instruction inst; - memset(&inst, 0, sizeof(inst)); - if (!emit_alu(emit, &inst)) - return 0; - } + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } - alu_offset = emit->node_first_alu; - alu_end = code->alu.length - alu_offset - 1; - tex_offset = emit->node_first_tex; - tex_end = code->tex.length - tex_offset - 1; + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; - if (code->tex.length == emit->node_first_tex) { - if (emit->current_node > 0) { - error("Node %i has no TEX instructions", emit->current_node); - return 0; - } + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } - tex_end = 0; - } else { - if (emit->current_node == 0) - code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; - } + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } - /* Write the config register. - * Note: The order in which the words for each node are written - * is not correct here and needs to be fixed up once we're entirely - * done - * - * Also note that the register specification from AMD is slightly - * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - ((alu_offset << R300_ALU_START_SHIFT) - & R300_ALU_START_MASK) - | ((alu_end << R300_ALU_SIZE_SHIFT) - & R300_ALU_SIZE_MASK) - | ((tex_offset << R300_TEX_START_SHIFT) - & R300_TEX_START_MASK) - | ((tex_end << R300_TEX_SIZE_SHIFT) - & R300_TEX_SIZE_MASK) - | emit->node_flags - | (get_msbs_tex(tex_offset, 5) - << R400_TEX_START_MSB_SHIFT) - | (get_msbs_tex(tex_end, 5) - << R400_TEX_SIZE_MSB_SHIFT) - ; + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) & R300_ALU_START_MASK) | + ((alu_end << R300_ALU_SIZE_SHIFT) & R300_ALU_SIZE_MASK) | + ((tex_offset << R300_TEX_START_SHIFT) & R300_TEX_START_MASK) | + ((tex_end << R300_TEX_SIZE_SHIFT) & R300_TEX_SIZE_MASK) | emit->node_flags | + (get_msbs_tex(tex_offset, 5) << R400_TEX_START_MSB_SHIFT) | + (get_msbs_tex(tex_end, 5) << R400_TEX_SIZE_MSB_SHIFT); - /* Write r400 extended instruction fields. These will be ignored on - * r300 cards. */ - alu_offset_msbs = get_msbs_alu(alu_offset); - alu_end_msbs = get_msbs_alu(alu_end); - switch(emit->current_node) { - case 0: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START3_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; - break; - case 1: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START2_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; - break; - case 2: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START1_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; - break; - case 3: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START0_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; - break; - } - return 1; + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch (emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } + return 1; } - /** * Begin a block of texture instructions. * Create the necessary indirection. */ -static int begin_tex(struct r300_emit_state * emit) +static int +begin_tex(struct r300_emit_state *emit) { - PROG_CODE; + PROG_CODE; - if (code->alu.length == emit->node_first_alu && - code->tex.length == emit->node_first_tex) { - return 1; - } + if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) { + return 1; + } - if (emit->current_node == 3) { - error("Too many texture indirections"); - return 0; - } + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } - if (!finish_node(emit)) - return 0; + if (!finish_node(emit)) + return 0; - emit->current_node++; - emit->node_first_tex = code->tex.length; - emit->node_first_alu = code->alu.length; - emit->node_flags = 0; - return 1; + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; } - -static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +static int +emit_tex(struct r300_emit_state *emit, struct rc_instruction *inst) { - unsigned int unit; - unsigned int dest; - unsigned int opcode; - PROG_CODE; + unsigned int unit; + unsigned int dest; + unsigned int opcode; + PROG_CODE; - if (code->tex.length >= emit->compiler->Base.max_tex_insts) { - error("Too many TEX instructions"); - return 0; - } + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { + error("Too many TEX instructions"); + return 0; + } - unit = inst->U.I.TexSrcUnit; - dest = inst->U.I.DstReg.Index; + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; - switch(inst->U.I.Opcode) { - case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; - default: - error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); - return 0; - } + switch (inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } - if (inst->U.I.Opcode == RC_OPCODE_KIL) { - unit = 0; - dest = 0; - } else { - use_temporary(code, dest); - } + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } - use_temporary(code, inst->U.I.SrcReg[0].Index); + use_temporary(code, inst->U.I.SrcReg[0].Index); - code->tex.inst[code->tex.length++] = - ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) - & R300_SRC_ADDR_MASK) - | ((dest << R300_DST_ADDR_SHIFT) - & R300_DST_ADDR_MASK) - | (unit << R300_TEX_ID_SHIFT) - | (opcode << R300_TEX_INST_SHIFT) - | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? - R400_SRC_ADDR_EXT_BIT : 0) - | (dest >= R300_PFS_NUM_TEMP_REGS ? - R400_DST_ADDR_EXT_BIT : 0) - ; - return 1; + code->tex.inst[code->tex.length++] = + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) & R300_SRC_ADDR_MASK) | + ((dest << R300_DST_ADDR_SHIFT) & R300_DST_ADDR_MASK) | (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT) | + (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? R400_SRC_ADDR_EXT_BIT : 0) | + (dest >= R300_PFS_NUM_TEMP_REGS ? R400_DST_ADDR_EXT_BIT : 0); + return 1; } - /** * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +void +r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) { - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r300_emit_state emit; - struct r300_fragment_program_code *code = &compiler->code->code.r300; - unsigned int tex_end; + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; - memset(&emit, 0, sizeof(emit)); - emit.compiler = compiler; + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; - memset(code, 0, sizeof(struct r300_fragment_program_code)); + memset(code, 0, sizeof(struct r300_fragment_program_code)); - for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; - inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { - begin_tex(&emit); - continue; - } + for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } - emit_tex(&emit, inst); - } else { - emit_alu(&emit, &inst->U.P); - } - } + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } - if (code->pixsize >= compiler->Base.max_temp_regs) - rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); - if (compiler->Base.Error) - return; + if (compiler->Base.Error) + return; - /* Finish the program */ - finish_node(&emit); + /* Finish the program */ + finish_node(&emit); - code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ - /* Set r400 extended instruction fields. These values will be ignored - * on r300 cards. */ - code->r400_code_offset_ext |= - (get_msbs_alu(0) - << R400_ALU_OFFSET_MSB_SHIFT) - | (get_msbs_alu(code->alu.length - 1) - << R400_ALU_SIZE_MSB_SHIFT); + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= (get_msbs_alu(0) << R400_ALU_OFFSET_MSB_SHIFT) | + (get_msbs_alu(code->alu.length - 1) << R400_ALU_SIZE_MSB_SHIFT); - tex_end = code->tex.length ? code->tex.length - 1 : 0; - code->code_offset = - ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) - & R300_PFS_CNTL_ALU_OFFSET_MASK) - | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) - & R300_PFS_CNTL_ALU_END_MASK) - | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) - & R300_PFS_CNTL_TEX_OFFSET_MASK) - | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) - & R300_PFS_CNTL_TEX_END_MASK) - | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) - | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) - ; + tex_end = code->tex.length ? code->tex.length - 1 : 0; + code->code_offset = + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) & R300_PFS_CNTL_ALU_OFFSET_MASK) | + (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) & R300_PFS_CNTL_ALU_END_MASK) | + ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) & R300_PFS_CNTL_TEX_OFFSET_MASK) | + ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) & R300_PFS_CNTL_TEX_END_MASK) | + (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) | + (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT); - if (emit.current_node < 3) { - int shift = 3 - emit.current_node; - int i; - for(i = emit.current_node; i >= 0; --i) - code->code_addr[shift + i] = code->code_addr[i]; - for(i = 0; i < shift; ++i) - code->code_addr[i] = 0; - } + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for (i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for (i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } - if (code->pixsize >= R300_PFS_NUM_TEMP_REGS - || code->alu.length > R300_PFS_MAX_ALU_INST - || code->tex.length > R300_PFS_MAX_TEX_INST) { + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS || code->alu.length > R300_PFS_MAX_ALU_INST || + code->tex.length > R300_PFS_MAX_TEX_INST) { - code->r390_mode = 1; - } + code->r390_mode = 1; + } } diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c index 2549b21b480..0676e9721c5 100644 --- a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -17,28 +17,28 @@ #include "r300_reg.h" #include "radeon_compiler.h" -#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) +#define MAKE_SWZ3(x, y, z) \ + (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) struct swizzle_data { - unsigned int hash; /**< swizzle value this matches */ - unsigned int base; /**< base value for hw swizzle */ - unsigned int stride; /**< difference in base between arg0/1/2 */ - unsigned int srcp_stride; /**< difference in base between arg0/scrp */ + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ }; static const struct swizzle_data native_swizzles[] = { - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} -}; + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}}; static const int num_native_swizzles = ARRAY_SIZE(native_swizzles); /* Only swizzles with srcp_stride != 0 can be used for presub, so @@ -49,24 +49,25 @@ static const int num_presub_swizzles = 5; * Find a native RGB swizzle that matches the given swizzle. * Returns 0 if none found. */ -static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +static const struct swizzle_data * +lookup_native_swizzle(unsigned int swizzle) { - int i, comp; + int i, comp; - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data* sd = &native_swizzles[i]; - for(comp = 0; comp < 3; ++comp) { - unsigned int swz = GET_SWZ(swizzle, comp); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz != GET_SWZ(sd->hash, comp)) - break; - } - if (comp == 3) - return sd; - } + for (i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + for (comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } - return NULL; + return NULL; } /** @@ -74,154 +75,156 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) * it is better to use r300_swizzle_is_native() which can be accessed via * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). */ -int r300_swizzle_is_native_basic(unsigned int swizzle) +int +r300_swizzle_is_native_basic(unsigned int swizzle) { - if(lookup_native_swizzle(swizzle)) - return 1; - else - return 0; + if (lookup_native_swizzle(swizzle)) + return 1; + else + return 0; } /** * Check whether the given instruction supports the swizzle and negate * combinations in the given source register. */ -static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +static int +r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { - const struct swizzle_data* sd; - unsigned int relevant; - int j; + const struct swizzle_data *sd; + unsigned int relevant; + int j; - if (opcode == RC_OPCODE_KIL || - opcode == RC_OPCODE_TEX || - opcode == RC_OPCODE_TXB || - opcode == RC_OPCODE_TXP) { - if (reg.Abs || reg.Negate) - return 0; + if (opcode == RC_OPCODE_KIL || opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + if (reg.Abs || reg.Negate) + return 0; - for(j = 0; j < 4; ++j) { - unsigned int swz = GET_SWZ(reg.Swizzle, j); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz != j) - return 0; - } + for (j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } - return 1; - } + return 1; + } - relevant = 0; + relevant = 0; - for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) - relevant |= 1 << j; + for (j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return 0; + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; - sd = lookup_native_swizzle(reg.Swizzle); - if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) - return 0; + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) + return 0; - return 1; + return 1; } - -static void r300_swizzle_split( - struct rc_src_register src, unsigned int mask, - struct rc_swizzle_split * split) +static void +r300_swizzle_split(struct rc_src_register src, unsigned int mask, struct rc_swizzle_split *split) { - split->NumPhases = 0; + split->NumPhases = 0; - while(mask) { - unsigned int best_matchcount = 0; - unsigned int best_matchmask = 0; - int i, comp; + while (mask) { + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; - unsigned num_swizzles = src.File == RC_FILE_PRESUB ? num_presub_swizzles : num_native_swizzles; + unsigned num_swizzles = + src.File == RC_FILE_PRESUB ? num_presub_swizzles : num_native_swizzles; - for(i = 0; i < num_swizzles; ++i) { - const struct swizzle_data *sd = &native_swizzles[i]; - unsigned int matchcount = 0; - unsigned int matchmask = 0; - for(comp = 0; comp < 3; ++comp) { - unsigned int swz; - if (!GET_BIT(mask, comp)) - continue; - swz = GET_SWZ(src.Swizzle, comp); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz == GET_SWZ(sd->hash, comp)) { - /* check if the negate bit of current component - * is the same for already matched components */ - if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) - continue; + for (i = 0; i < num_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for (comp = 0; comp < 3; ++comp) { + unsigned int swz; + if (!GET_BIT(mask, comp)) + continue; + swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; - matchcount++; - matchmask |= 1 << comp; - } - } - if (matchcount > best_matchcount) { - best_matchcount = matchcount; - best_matchmask = matchmask; - if (matchmask == (mask & RC_MASK_XYZ)) - break; - } - } + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } - if (mask & RC_MASK_W) - best_matchmask |= RC_MASK_W; + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; - split->Phase[split->NumPhases++] = best_matchmask; - mask &= ~best_matchmask; - } + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } } -const struct rc_swizzle_caps r300_swizzle_caps = { - .IsNative = r300_swizzle_is_native, - .Split = r300_swizzle_split -}; - +const struct rc_swizzle_caps r300_swizzle_caps = {.IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split}; /** * Translate an RGB (XYZ) swizzle into the hardware code for the given * instruction source. */ -unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +unsigned int +r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { - const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + const struct swizzle_data *sd = lookup_native_swizzle(swizzle); - if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { - fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); - return 0; - } + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } - if (src == RC_PAIR_PRESUB_SRC) { - return sd->base + sd->srcp_stride; - } else { - return sd->base + src*sd->stride; - } + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src * sd->stride; + } } - /** * Translate an Alpha (W) swizzle into the hardware code for the given * instruction source. */ -unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +unsigned int +r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { - unsigned int swz = GET_SWZ(swizzle, 0); - if (src == RC_PAIR_PRESUB_SRC) { - return R300_ALU_ARGA_SRCP_X + swz; - } - if (swz < 3) - return swz + 3*src; + unsigned int swz = GET_SWZ(swizzle, 0); + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swz; + } + if (swz < 3) + return swz + 3 * src; - switch(swz) { - case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; - case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; - default: return R300_ALU_ARGA_ONE; - } + switch (swz) { + case RC_SWIZZLE_W: + return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: + return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: + return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: + return R300_ALU_ARGA_HALF; + default: + return R300_ALU_ARGA_ONE; + } } diff --git a/src/gallium/drivers/r300/compiler/r300_nir.c b/src/gallium/drivers/r300/compiler/r300_nir.c index d041cebef06..54669bd2899 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir.c +++ b/src/gallium/drivers/r300/compiler/r300_nir.c @@ -11,7 +11,7 @@ bool r300_is_only_used_as_float(const nir_alu_instr *instr) { - nir_foreach_use(src, &instr->def) { + nir_foreach_use (src, &instr->def) { if (nir_src_is_if(src)) return false; @@ -29,7 +29,7 @@ r300_is_only_used_as_float(const nir_alu_instr *instr) return false; break; default: - break; + break; } const nir_op_info *info = &nir_op_infos[alu->op]; @@ -46,7 +46,7 @@ r300_is_only_used_as_float(const nir_alu_instr *instr) static unsigned char r300_should_vectorize_instr(const nir_instr *instr, const void *data) { - bool *too_many_ubos = (bool *) data; + bool *too_many_ubos = (bool *)data; if (instr->type != nir_instr_type_alu) return 0; @@ -88,7 +88,8 @@ r300_should_vectorize_instr(const nir_instr *instr, const void *data) * the constants later, we need to be extra careful with adding * new constants anyway. */ -static bool have_too_many_ubos(nir_shader *s, bool is_r500) +static bool +have_too_many_ubos(nir_shader *s, bool is_r500) { if (s->info.stage != MESA_SHADER_FRAGMENT) return false; @@ -96,9 +97,9 @@ static bool have_too_many_ubos(nir_shader *s, bool is_r500) if (is_r500) return false; - nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) { + nir_foreach_variable_with_modes (var, s, nir_var_mem_ubo) { int ubo = var->data.driver_location; - assert (ubo == 0); + assert(ubo == 0); unsigned size = glsl_get_explicit_size(var->interface_type, false); if (DIV_ROUND_UP(size, 16) > 32) @@ -208,8 +209,7 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false); if (is_r500) - nir_shader_intrinsics_pass(s, set_speculate, - nir_metadata_control_flow, NULL); + nir_shader_intrinsics_pass(s, set_speculate, nir_metadata_control_flow, NULL); NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true); if (s->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs); @@ -221,10 +221,9 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) NIR_PASS(progress, s, nir_opt_loop); bool too_many_ubos = have_too_many_ubos(s, is_r500); - NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, - &too_many_ubos); + NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, &too_many_ubos); NIR_PASS(progress, s, nir_opt_undef); - if(!progress) + if (!progress) NIR_PASS(progress, s, nir_lower_undef_to_zero); NIR_PASS(progress, s, nir_opt_loop_unroll); @@ -246,11 +245,11 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) } while (progress); NIR_PASS_V(s, nir_lower_var_copies); - NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp, - NULL); + NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp, NULL); } -static char *r300_check_control_flow(nir_shader *s) +static char * +r300_check_control_flow(nir_shader *s) { nir_function_impl *impl = nir_shader_get_entrypoint(s); nir_block *first = nir_start_block(impl); @@ -258,12 +257,14 @@ static char *r300_check_control_flow(nir_shader *s) if (next) { switch (next->type) { - case nir_cf_node_if: - return "If/then statements not supported by R300/R400 shaders, should have been flattened by peephole_select."; - case nir_cf_node_loop: - return "Looping not supported R300/R400 shaders, all loops must be statically unrollable."; - default: - return "Unknown control flow type"; + case nir_cf_node_if: + return "If/then statements not supported by R300/R400 shaders, should have been " + "flattened by peephole_select."; + case nir_cf_node_loop: + return "Looping not supported R300/R400 shaders, all loops must be statically " + "unrollable."; + default: + return "Unknown control flow type"; } } @@ -283,10 +284,9 @@ r300_finalize_nir(struct pipe_screen *pscreen, void *nir) * because they're needed for YUV variant lowering. */ nir_remove_dead_derefs(s); - nir_foreach_uniform_variable_safe(var, s) { + nir_foreach_uniform_variable_safe (var, s) { if (var->data.mode == nir_var_uniform && - (glsl_type_get_image_count(var->type) || - glsl_type_get_sampler_count(var->type))) + (glsl_type_get_image_count(var->type) || glsl_type_get_sampler_count(var->type))) continue; exec_node_remove(&var->node); diff --git a/src/gallium/drivers/r300/compiler/r300_nir.h b/src/gallium/drivers/r300/compiler/r300_nir.h index c88d4a2d119..6eb205b0699 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir.h +++ b/src/gallium/drivers/r300/compiler/r300_nir.h @@ -8,13 +8,12 @@ #include -#include "pipe/p_screen.h" #include "compiler/nir/nir.h" +#include "pipe/p_screen.h" static inline bool -is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, - const uint8_t *swizzle) +is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) { nir_instr *parent = instr->src[src].src.ssa->parent_instr; if (parent->type != nir_instr_type_intrinsic) @@ -36,7 +35,7 @@ static inline bool is_not_used_in_single_if(const nir_alu_instr *instr) { unsigned if_uses = 0; - nir_foreach_use(src, &instr->def) { + nir_foreach_use (src, &instr->def) { if (nir_src_is_if(src)) if_uses++; else @@ -87,19 +86,19 @@ check_instr_and_src_value(nir_op op, nir_instr **instr, double value) unsigned i; for (i = 0; i <= 2; i++) { if (i == 2) { - return false; + return false; } nir_alu_src src = alu->src[i]; if (nir_src_is_const(src.src)) { - /* All components must be reading the same value. */ - for (unsigned j = 0; j < alu->def.num_components - 1; j++) { - if (src.swizzle[j] != src.swizzle[j + 1]) { - return false; - } - } - if (fabs(nir_src_comp_as_float(src.src, src.swizzle[0]) - value) < 1e-5) { - break; + /* All components must be reading the same value. */ + for (unsigned j = 0; j < alu->def.num_components - 1; j++) { + if (src.swizzle[j] != src.swizzle[j + 1]) { + return false; } + } + if (fabs(nir_src_comp_as_float(src.src, src.swizzle[0]) - value) < 1e-5) { + break; + } } } *instr = alu->src[1 - i].src.ssa->parent_instr; diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c index d530ca08509..266823ea702 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -7,47 +7,48 @@ #include -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" -#include "radeon_program_alu.h" -#include "radeon_program_tex.h" -#include "radeon_rename_regs.h" -#include "radeon_remove_constants.h" -#include "radeon_variable.h" -#include "radeon_list.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "radeon_remove_constants.h" +#include "radeon_rename_regs.h" +#include "radeon_variable.h" - -static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) +static void +rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct rc_instruction *rci; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc; + struct rc_instruction *rci; - for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction * inst = &rci->U.I; - unsigned i; - const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; + rci = rci->Next) { + struct rc_sub_instruction *inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); - if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) - continue; + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; - if (inst->DstReg.WriteMask & RC_MASK_Z) { - inst->DstReg.WriteMask = RC_MASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } - if (!info->IsComponentwise) { - continue; - } + if (!info->IsComponentwise) { + continue; + } - for (i = 0; i < info->NumSrcRegs; i++) { - inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); - } - } + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); + } + } } /** @@ -66,100 +67,92 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) * rc_get_variables can't get properly readers for normal instructions if presubtract * is present (it works fine for pair instructions). */ -static void rc_convert_rgb_alpha(struct radeon_compiler *c, void *user) +static void +rc_convert_rgb_alpha(struct radeon_compiler *c, void *user) { - struct rc_list * variables; - struct rc_list * var_ptr; + struct rc_list *variables; + struct rc_list *var_ptr; - variables = rc_get_variables(c); + variables = rc_get_variables(c); - for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { - struct rc_variable * var = var_ptr->Item; + for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { + struct rc_variable *var = var_ptr->Item; - if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { - continue; - } + if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + continue; + } - /* Only rewrite scalar opcodes that are used separately for now. */ - if (var->Friend) - continue; + /* Only rewrite scalar opcodes that are used separately for now. */ + if (var->Friend) + continue; - const struct rc_opcode_info * opcode = rc_get_opcode_info(var->Inst->U.I.Opcode); - if (opcode->IsStandardScalar && var->Dst.WriteMask != RC_MASK_W) { - unsigned index = rc_find_free_temporary(c); - rc_variable_change_dst(var, index, RC_MASK_W); - } - } + const struct rc_opcode_info *opcode = rc_get_opcode_info(var->Inst->U.I.Opcode); + if (opcode->IsStandardScalar && var->Dst.WriteMask != RC_MASK_W) { + unsigned index = rc_find_free_temporary(c); + rc_variable_change_dst(var, index, RC_MASK_W); + } + } } -void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +void +r3xx_compile_fragment_program(struct r300_fragment_program_compiler *c) { - int is_r500 = c->Base.is_r500; - int opt = !c->Base.disable_optimizations; - int alpha2one = c->state.alpha_to_one; + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + int alpha2one = c->state.alpha_to_one; + bool dbg = c->Base.Debug & RC_DBG_LOG; - /* Lists of instruction transformations. */ - struct radeon_program_transformation force_alpha_to_one[] = { - { &rc_force_output_alpha_to_one, c }, - { NULL, NULL } - }; + /* Lists of instruction transformations. */ + struct radeon_program_transformation force_alpha_to_one[] = {{&rc_force_output_alpha_to_one, c}, + {NULL, NULL}}; - struct radeon_program_transformation rewrite_tex[] = { - { &radeonTransformTEX, c }, - { NULL, NULL } - }; + struct radeon_program_transformation rewrite_tex[] = {{&radeonTransformTEX, c}, {NULL, NULL}}; - struct radeon_program_transformation native_rewrite_r500[] = { - { &radeonTransformALU, NULL }, - { &radeonTransformDeriv, NULL }, - { NULL, NULL } - }; + struct radeon_program_transformation native_rewrite_r500[] = {{&radeonTransformALU, NULL}, + {&radeonTransformDeriv, NULL}, + {NULL, NULL}}; - struct radeon_program_transformation native_rewrite_r300[] = { - { &radeonTransformALU, NULL }, - { &radeonStubDeriv, NULL }, - { NULL, NULL } - }; + struct radeon_program_transformation native_rewrite_r300[] = {{&radeonTransformALU, NULL}, + {&radeonStubDeriv, NULL}, + {NULL, NULL}}; - struct radeon_program_transformation opt_presubtract[] = { - { &rc_opt_presubtract, NULL }, - { NULL, NULL } - }; + struct radeon_program_transformation opt_presubtract[] = {{&rc_opt_presubtract, NULL}, + {NULL, NULL}}; + /* List of compiler passes. */ + /* clang-format off */ + struct radeon_compiler_pass fs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, + {"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one}, + {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, r500_transform_IF, NULL}, + {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, + {"deadcode", 1, opt, rc_dataflow_deadcode, NULL}, + {"convert rgb<->alpha", 1, opt, rc_convert_rgb_alpha, NULL}, + {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL}, + {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract}, + {"pair translate", 1, 1, rc_pair_translate, NULL}, + {"pair scheduling", 1, 1, rc_pair_schedule, &opt}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, + {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, + {"dump machine code", 0, is_r500 && dbg, r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && dbg, r300FragmentProgramDump, NULL}, + {NULL, 0, 0, NULL, NULL}}; + /* clang-format on */ - /* List of compiler passes. */ - struct radeon_compiler_pass fs_list[] = { - /* NAME DUMP PREDICATE FUNCTION PARAM */ - {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, - {"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one}, - {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, - {"transform IF", 1, is_r500, r500_transform_IF, NULL}, - {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, - {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, - {"deadcode", 1, opt, rc_dataflow_deadcode, NULL}, - {"convert rgb<->alpha", 1, opt, rc_convert_rgb_alpha, NULL}, - {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, - {"dataflow optimize", 1, opt, rc_optimize, NULL}, - {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, - {"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract}, - {"pair translate", 1, 1, rc_pair_translate, NULL}, - {"pair scheduling", 1, 1, rc_pair_schedule, &opt}, - {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, - {"register allocation", 1, 1, rc_pair_regalloc, &opt}, - {"final code validation", 0, 1, rc_validate_final_shader, NULL}, - {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, - {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, - {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, - {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, - {NULL, 0, 0, NULL, NULL} - }; + c->Base.type = RC_FRAGMENT_PROGRAM; + c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - c->Base.type = RC_FRAGMENT_PROGRAM; - c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; + rc_run_compiler(&c->Base, fs_list); - rc_run_compiler(&c->Base, fs_list); - - rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index 8c622401254..e522fcae614 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -12,12 +12,12 @@ #include "radeon_compiler_util.h" #include "radeon_dataflow.h" +#include "radeon_list.h" #include "radeon_program.h" #include "radeon_program_alu.h" -#include "radeon_swizzle.h" -#include "radeon_remove_constants.h" #include "radeon_regalloc.h" -#include "radeon_list.h" +#include "radeon_remove_constants.h" +#include "radeon_swizzle.h" #include "util/compiler.h" @@ -25,799 +25,778 @@ * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(vpi->SrcReg[x].File), \ - RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), t_swizzle(y), t_swizzle(y), t_swizzle(y), \ + t_swizzle(y), t_src_class(vpi->SrcReg[x].File), RC_MASK_NONE) | \ + (vpi->SrcReg[x].RelAddr << 4)) - -static unsigned long t_dst_mask(unsigned int mask) +static unsigned long +t_dst_mask(unsigned int mask) { - /* RC_MASK_* is equivalent to VSF_FLAG_* */ - return mask & RC_MASK_XYZW; + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; } -static unsigned long t_dst_class(rc_register_file file) +static unsigned long +t_dst_class(rc_register_file file) { - switch (file) { - default: - fprintf(stderr, "%s: Bad register file %i\n", __func__, file); - FALLTHROUGH; - case RC_FILE_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case RC_FILE_OUTPUT: - return PVS_DST_REG_OUT; - case RC_FILE_ADDRESS: - return PVS_DST_REG_A0; - } + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __func__, file); + FALLTHROUGH; + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } } -static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct rc_dst_register *dst) +static unsigned long +t_dst_index(struct r300_vertex_program_code *vp, struct rc_dst_register *dst) { - if (dst->File == RC_FILE_OUTPUT) - return vp->outputs[dst->Index]; + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; - return dst->Index; + return dst->Index; } -static unsigned long t_src_class(rc_register_file file) +static unsigned long +t_src_class(rc_register_file file) { - switch (file) { - default: - fprintf(stderr, "%s: Bad register file %i\n", __func__, file); - FALLTHROUGH; - case RC_FILE_NONE: - case RC_FILE_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case RC_FILE_INPUT: - return PVS_SRC_REG_INPUT; - case RC_FILE_CONSTANT: - return PVS_SRC_REG_CONSTANT; - } + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __func__, file); + FALLTHROUGH; + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } } -static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +static int +t_src_conflict(struct rc_src_register a, struct rc_src_register b) { - unsigned long aclass = t_src_class(a.File); - unsigned long bclass = t_src_class(b.File); + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); - if (aclass != bclass) - return 0; - if (aclass == PVS_SRC_REG_TEMPORARY) - return 0; + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; - if (a.RelAddr || b.RelAddr) - return 1; - if (a.Index != b.Index) - return 1; + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; - return 0; + return 0; } -static inline unsigned long t_swizzle(unsigned int swizzle) +static inline unsigned long +t_swizzle(unsigned int swizzle) { - /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; } -static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct rc_src_register *src) +static unsigned long +t_src_index(struct r300_vertex_program_code *vp, struct rc_src_register *src) { - if (src->File == RC_FILE_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } } /* these two functions should probably be merged... */ -static unsigned long t_src(struct r300_vertex_program_code *vp, - struct rc_src_register *src) +static unsigned long +t_src(struct r300_vertex_program_code *vp, struct rc_src_register *src) { - /* src->Negate uses the RC_MASK_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | - (src->RelAddr << 4) | (src->Abs << 3); + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); } -static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct rc_src_register *src) +static unsigned long +t_src_scalar(struct r300_vertex_program_code *vp, struct rc_src_register *src) { - /* src->Negate uses the RC_MASK_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - unsigned int swz = rc_get_scalar_src_swz(src->Swizzle); + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + unsigned int swz = rc_get_scalar_src_swz(src->Swizzle); - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(swz), - t_swizzle(swz), - t_swizzle(swz), - t_swizzle(swz), - t_src_class(src->File), - src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (src->RelAddr << 4) | (src->Abs << 3); + return PVS_SRC_OPERAND(t_src_index(vp, src), t_swizzle(swz), t_swizzle(swz), t_swizzle(swz), + t_swizzle(swz), t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4) | (src->Abs << 3); } -static int valid_dst(struct r300_vertex_program_code *vp, - struct rc_dst_register *dst) +static int +valid_dst(struct r300_vertex_program_code *vp, struct rc_dst_register *dst) { - if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { - return 0; - } else if (dst->File == RC_FILE_ADDRESS) { - assert(dst->Index == 0); - } + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } - return 1; + return 1; } -static void ei_vector1(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) +static void +ei_vector1(struct r300_vertex_program_code *vp, unsigned int hw_opcode, + struct rc_sub_instruction *vpi, unsigned int *inst) { - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = __CONST(0, RC_SWIZZLE_ZERO); -} - -static void ei_vector2(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, RC_SWIZZLE_ZERO); -} - -static void ei_math1(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = __CONST(0, RC_SWIZZLE_ZERO); -} - -static void ei_cmp(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_COND_MUX_GTE, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - - /* Arguments with constant swizzles still count as a unique - * temporary, so we should make sure these arguments share a - * register index with one of the other arguments. */ - for (unsigned i = 0; i < 3; i++) { - unsigned j = (i + 1) % 3; - if (vpi->SrcReg[i].File == RC_FILE_NONE && - (vpi->SrcReg[j].File == RC_FILE_NONE || - vpi->SrcReg[j].File == RC_FILE_TEMPORARY)) { - vpi->SrcReg[i].Index = vpi->SrcReg[j].Index; - break; - } - } - - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = t_src(vp, &vpi->SrcReg[2]); - inst[3] = t_src(vp, &vpi->SrcReg[1]); -} - -static void ei_lit(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); -} - -static void ei_mad(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - unsigned int i; - /* Remarks about hardware limitations of MAD - * (please preserve this comment, as this information is _NOT_ - * in the documentation provided by AMD). - * - * As described in the documentation, MAD with three unique temporary - * source registers requires the use of the macro version. - * - * However (and this is not mentioned in the documentation), apparently - * the macro version is _NOT_ a full superset of the normal version. - * In particular, the macro version does not always work when relative - * addressing is used in the source operands. - * - * This limitation caused incorrect rendering in Sauerbraten's OpenGL - * assembly shader path when using medium quality animations - * (i.e. animations with matrix blending instead of quaternion blending). - * - * Unfortunately, I (nha) have been unable to extract a Piglit regression - * test for this issue - for some reason, it is possible to have vertex - * programs whose prefix is *exactly* the same as the prefix of the - * offending program in Sauerbraten up to the offending instruction - * without causing any trouble. - * - * Bottom line: Only use the macro version only when really necessary; - * according to AMD docs, this should improve performance by one clock - * as a nice side bonus. - */ - if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && - vpi->SrcReg[1].File == RC_FILE_TEMPORARY && - vpi->SrcReg[2].File == RC_FILE_TEMPORARY && - vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && - vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && - vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - 0, - 1, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - } else { - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void +ei_vector2(struct r300_vertex_program_code *vp, unsigned int hw_opcode, + struct rc_sub_instruction *vpi, unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 0, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void +ei_math1(struct r300_vertex_program_code *vp, unsigned int hw_opcode, + struct rc_sub_instruction *vpi, unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, 1, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void +ei_cmp(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_COND_MUX_GTE, 0, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - /* Arguments with constant swizzles still count as a unique - * temporary, so we should make sure these arguments share a - * register index with one of the other arguments. */ - for (i = 0; i < 3; i++) { - unsigned int j; - if (vpi->SrcReg[i].File != RC_FILE_NONE) - continue; + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (unsigned i = 0; i < 3; i++) { + unsigned j = (i + 1) % 3; + if (vpi->SrcReg[i].File == RC_FILE_NONE && + (vpi->SrcReg[j].File == RC_FILE_NONE || vpi->SrcReg[j].File == RC_FILE_TEMPORARY)) { + vpi->SrcReg[i].Index = vpi->SrcReg[j].Index; + break; + } + } - for (j = 0; j < 3; j++) { - if (i != j) { - vpi->SrcReg[i].Index = - vpi->SrcReg[j].Index; - break; - } - } - } - } - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = t_src(vp, &vpi->SrcReg[2]); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[2]); + inst[3] = t_src(vp, &vpi->SrcReg[1]); } -static void ei_pow(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) +static void +ei_lit(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst) { - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File), - vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); + // LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, 1, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); } -static void translate_vertex_program(struct radeon_compiler *c, void *user) +static void +ei_mad(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst) { - struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; - struct rc_instruction *rci; + unsigned int i; + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, 0, 1, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, 0, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); - unsigned loops[R500_PVS_MAX_LOOP_DEPTH] = {}; - unsigned loop_depth = 0; - bool last_input_read_at_loop_end = false; - bool last_pos_write_at_loop_end = false; + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; - compiler->code->pos_end = 0; /* Not supported yet */ - compiler->code->length = 0; - compiler->code->num_temporaries = 0; - compiler->code->last_input_read = 0; - compiler->code->last_pos_write = 0; + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = vpi->SrcReg[j].Index; + break; + } + } + } + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} - compiler->SetHwInputOutput(compiler); +static void +ei_pow(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, 1, 0, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File), + vpi->SaturateMode == RC_SATURATE_ZERO_ONE); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} - for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction *vpi = &rci->U.I; - unsigned int *inst = compiler->code->body.d + compiler->code->length; - const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); +static void +translate_vertex_program(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler *)c; + struct rc_instruction *rci; - /* Skip instructions writing to non-existing destination */ - if (!valid_dst(compiler->code, &vpi->DstReg)) - continue; + unsigned loops[R500_PVS_MAX_LOOP_DEPTH] = {}; + unsigned loop_depth = 0; + bool last_input_read_at_loop_end = false; + bool last_pos_write_at_loop_end = false; - if (info->HasDstReg) { - /* Neither is Saturate. */ - if (vpi->SaturateMode != RC_SATURATE_NONE && !c->is_r500) { - rc_error(&compiler->Base, "Vertex program does not support the Saturate " - "modifier (yet).\n"); - } - } + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + compiler->code->num_temporaries = 0; + compiler->code->last_input_read = 0; + compiler->code->last_pos_write = 0; - if (compiler->code->length >= c->max_alu_insts * 4) { - rc_error(&compiler->Base, "Vertex program has too many instructions\n"); - return; - } + compiler->SetHwInputOutput(compiler); - assert(compiler->Base.is_r500 || - (vpi->Opcode != RC_OPCODE_SEQ && - vpi->Opcode != RC_OPCODE_SNE)); + for (rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; + rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); - switch (vpi->Opcode) { - case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case RC_OPCODE_ARR: ei_vector1(compiler->code, VE_FLT2FIX_DX_RND, vpi, inst); break; - case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; - case RC_OPCODE_CMP: ei_cmp(compiler->code, vpi, inst); break; - case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; - case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; - case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; - case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; - case RC_OPCODE_BGNLOOP: - { - if ((!compiler->Base.is_r500 - && loop_depth >= R300_VS_MAX_LOOP_DEPTH) - || loop_depth >= R500_PVS_MAX_LOOP_DEPTH) { - rc_error(&compiler->Base, - "Loops are nested too deep."); - return; - } - loops[loop_depth++] = ((compiler->code->length)/ 4) + 1; - break; - } - case RC_OPCODE_ENDLOOP: - { - unsigned int act_addr; - unsigned int last_addr; - unsigned int ret_addr; + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; - if (loop_depth == 1 && last_input_read_at_loop_end) { - compiler->code->last_input_read = compiler->code->length / 4; - last_input_read_at_loop_end = false; - } - if (loop_depth == 1 && last_pos_write_at_loop_end) { - compiler->code->last_pos_write = compiler->code->length / 4; - last_pos_write_at_loop_end = false; - } + if (info->HasDstReg) { + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE && !c->is_r500) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } - ret_addr = loops[--loop_depth]; - act_addr = ret_addr - 1; - last_addr = (compiler->code->length / 4) - 1; + if (compiler->code->length >= c->max_alu_insts * 4) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } - if (loop_depth >= R300_VS_MAX_FC_OPS) { - rc_error(&compiler->Base, - "Too many flow control instructions."); - return; - } - /* Maximum of R500_PVS_FC_LOOP_CNT_JMP_INST is 0xff, here - * we reduce it to half to avoid occasional hangs on RV516 - * and downclocked RV530. - */ - if (compiler->Base.is_r500) { - compiler->code->fc_op_addrs.r500 - [compiler->code->num_fc_ops].lw = - R500_PVS_FC_ACT_ADRS(act_addr) - | R500_PVS_FC_LOOP_CNT_JMP_INST(0x0080) - ; - compiler->code->fc_op_addrs.r500 - [compiler->code->num_fc_ops].uw = - R500_PVS_FC_LAST_INST(last_addr) - | R500_PVS_FC_RTN_INST(ret_addr) - ; - } else { - compiler->code->fc_op_addrs.r300 - [compiler->code->num_fc_ops] = - R300_PVS_FC_ACT_ADRS(act_addr) - | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) - | R300_PVS_FC_LAST_INST(last_addr) - | R300_PVS_FC_RTN_INST(ret_addr) - ; - } - compiler->code->fc_loop_index[compiler->code->num_fc_ops] = - R300_PVS_FC_LOOP_INIT_VAL(0x0) - | R300_PVS_FC_LOOP_STEP_VAL(0x1) - ; - compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( - compiler->code->num_fc_ops); - compiler->code->num_fc_ops++; + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && vpi->Opcode != RC_OPCODE_SNE)); - break; - } + switch (vpi->Opcode) { + case RC_OPCODE_ADD: + ei_vector2(compiler->code, VE_ADD, vpi, inst); + break; + case RC_OPCODE_ARL: + ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); + break; + case RC_OPCODE_ARR: + ei_vector1(compiler->code, VE_FLT2FIX_DX_RND, vpi, inst); + break; + case RC_OPCODE_COS: + ei_math1(compiler->code, ME_COS, vpi, inst); + break; + case RC_OPCODE_CMP: + ei_cmp(compiler->code, vpi, inst); + break; + case RC_OPCODE_DP4: + ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); + break; + case RC_OPCODE_DST: + ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); + break; + case RC_OPCODE_EX2: + ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); + break; + case RC_OPCODE_EXP: + ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); + break; + case RC_OPCODE_FRC: + ei_vector1(compiler->code, VE_FRACTION, vpi, inst); + break; + case RC_OPCODE_LG2: + ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); + break; + case RC_OPCODE_LIT: + ei_lit(compiler->code, vpi, inst); + break; + case RC_OPCODE_LOG: + ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); + break; + case RC_OPCODE_MAD: + ei_mad(compiler->code, vpi, inst); + break; + case RC_OPCODE_MAX: + ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); + break; + case RC_OPCODE_MIN: + ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); + break; + case RC_OPCODE_MOV: + ei_vector1(compiler->code, VE_ADD, vpi, inst); + break; + case RC_OPCODE_MUL: + ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); + break; + case RC_OPCODE_POW: + ei_pow(compiler->code, vpi, inst); + break; + case RC_OPCODE_RCP: + ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); + break; + case RC_OPCODE_RSQ: + ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); + break; + case RC_OPCODE_SEQ: + ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); + break; + case RC_OPCODE_SGE: + ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); + break; + case RC_OPCODE_SIN: + ei_math1(compiler->code, ME_SIN, vpi, inst); + break; + case RC_OPCODE_SLT: + ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); + break; + case RC_OPCODE_SNE: + ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); + break; + case RC_OPCODE_BGNLOOP: { + if ((!compiler->Base.is_r500 && loop_depth >= R300_VS_MAX_LOOP_DEPTH) || + loop_depth >= R500_PVS_MAX_LOOP_DEPTH) { + rc_error(&compiler->Base, "Loops are nested too deep."); + return; + } + loops[loop_depth++] = ((compiler->code->length) / 4) + 1; + break; + } + case RC_OPCODE_ENDLOOP: { + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; - case RC_ME_PRED_SET_CLR: - ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst); - break; + if (loop_depth == 1 && last_input_read_at_loop_end) { + compiler->code->last_input_read = compiler->code->length / 4; + last_input_read_at_loop_end = false; + } + if (loop_depth == 1 && last_pos_write_at_loop_end) { + compiler->code->last_pos_write = compiler->code->length / 4; + last_pos_write_at_loop_end = false; + } - case RC_ME_PRED_SET_INV: - ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst); - break; + ret_addr = loops[--loop_depth]; + act_addr = ret_addr - 1; + last_addr = (compiler->code->length / 4) - 1; - case RC_ME_PRED_SET_POP: - ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst); - break; + if (loop_depth >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, "Too many flow control instructions."); + return; + } + /* Maximum of R500_PVS_FC_LOOP_CNT_JMP_INST is 0xff, here + * we reduce it to half to avoid occasional hangs on RV516 + * and downclocked RV530. + */ + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500[compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) | R500_PVS_FC_LOOP_CNT_JMP_INST(0x0080); + compiler->code->fc_op_addrs.r500[compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) | R500_PVS_FC_RTN_INST(ret_addr); + } else { + compiler->code->fc_op_addrs.r300[compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) | + R300_PVS_FC_LAST_INST(last_addr) | R300_PVS_FC_RTN_INST(ret_addr); + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) | R300_PVS_FC_LOOP_STEP_VAL(0x1); + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; - case RC_ME_PRED_SET_RESTORE: - ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst); - break; + break; + } - case RC_ME_PRED_SEQ: - ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst); - break; + case RC_ME_PRED_SET_CLR: + ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst); + break; - case RC_ME_PRED_SNEQ: - ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst); - break; + case RC_ME_PRED_SET_INV: + ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst); + break; - case RC_VE_PRED_SNEQ_PUSH: - ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH, - vpi, inst); - break; + case RC_ME_PRED_SET_POP: + ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst); + break; - default: - rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); - return; - } + case RC_ME_PRED_SET_RESTORE: + ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst); + break; - if (vpi->DstReg.Pred != RC_PRED_DISABLED) { - inst[0] |= (PVS_DST_PRED_ENABLE_MASK - << PVS_DST_PRED_ENABLE_SHIFT); - if (vpi->DstReg.Pred == RC_PRED_SET) { - inst[0] |= (PVS_DST_PRED_SENSE_MASK - << PVS_DST_PRED_SENSE_SHIFT); - } - } + case RC_ME_PRED_SEQ: + ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst); + break; - /* Update the number of temporaries. */ - if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && - vpi->DstReg.Index >= compiler->code->num_temporaries) - compiler->code->num_temporaries = vpi->DstReg.Index + 1; + case RC_ME_PRED_SNEQ: + ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst); + break; - /* last instruction that writes position */ - if (info->HasDstReg && vpi->DstReg.File == RC_FILE_OUTPUT && - t_dst_index(compiler->code, &vpi->DstReg) == 0) { - if (loop_depth == 0) - compiler->code->last_pos_write = compiler->code->length / 4; - else - last_pos_write_at_loop_end = true; - } + case RC_VE_PRED_SNEQ_PUSH: + ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH, vpi, inst); + break; - for (unsigned i = 0; i < info->NumSrcRegs; i++) { - if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && - vpi->SrcReg[i].Index >= compiler->code->num_temporaries) - compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; - if (vpi->SrcReg[i].File == RC_FILE_INPUT) { - if (loop_depth == 0) - compiler->code->last_input_read = compiler->code->length / 4; - else - last_input_read_at_loop_end = true; - } + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); + return; + } - } + if (vpi->DstReg.Pred != RC_PRED_DISABLED) { + inst[0] |= (PVS_DST_PRED_ENABLE_MASK << PVS_DST_PRED_ENABLE_SHIFT); + if (vpi->DstReg.Pred == RC_PRED_SET) { + inst[0] |= (PVS_DST_PRED_SENSE_MASK << PVS_DST_PRED_SENSE_SHIFT); + } + } + /* Update the number of temporaries. */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && + vpi->DstReg.Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->DstReg.Index + 1; - if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { - rc_error(&compiler->Base, "Too many temporaries.\n"); - return; - } + /* last instruction that writes position */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_OUTPUT && + t_dst_index(compiler->code, &vpi->DstReg) == 0) { + if (loop_depth == 0) + compiler->code->last_pos_write = compiler->code->length / 4; + else + last_pos_write_at_loop_end = true; + } - compiler->code->length += 4; + for (unsigned i = 0; i < info->NumSrcRegs; i++) { + if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && + vpi->SrcReg[i].Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; + if (vpi->SrcReg[i].File == RC_FILE_INPUT) { + if (loop_depth == 0) + compiler->code->last_input_read = compiler->code->length / 4; + else + last_input_read_at_loop_end = true; + } + } - if (compiler->Base.Error) - return; - } + if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "Too many temporaries.\n"); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } } struct temporary_allocation { - unsigned int Allocated:1; - unsigned int HwTemp:15; - struct rc_instruction * LastRead; + unsigned int Allocated : 1; + unsigned int HwTemp : 15; + struct rc_instruction *LastRead; }; -static int get_reg(struct radeon_compiler *c, struct temporary_allocation *ta, bool *hwtemps, - unsigned int orig) +static int +get_reg(struct radeon_compiler *c, struct temporary_allocation *ta, bool *hwtemps, + unsigned int orig) { - if (!ta[orig].Allocated) { - int j; - for (j = 0; j < c->max_temp_regs; ++j) - { - if (!hwtemps[j]) - break; - } - ta[orig].Allocated = 1; - ta[orig].HwTemp = j; - hwtemps[ta[orig].HwTemp] = true; - } + if (!ta[orig].Allocated) { + int j; + for (j = 0; j < c->max_temp_regs; ++j) { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = true; + } - return ta[orig].HwTemp; + return ta[orig].HwTemp; } -static void allocate_temporary_registers(struct radeon_compiler *c, void *user) +static void +allocate_temporary_registers(struct radeon_compiler *c, void *user) { - unsigned int node_count, node_index; - struct ra_class ** node_classes; - struct rc_list * var_ptr; - struct rc_list * variables; - struct ra_graph * graph; - const struct rc_regalloc_state *ra_state = c->regalloc_state; + unsigned int node_count, node_index; + struct ra_class **node_classes; + struct rc_list *var_ptr; + struct rc_list *variables; + struct ra_graph *graph; + const struct rc_regalloc_state *ra_state = c->regalloc_state; - rc_recompute_ips(c); + rc_recompute_ips(c); - /* Get list of program variables */ - variables = rc_get_variables(c); - node_count = rc_list_count(variables); - node_classes = memory_pool_malloc(&c->Pool, - node_count * sizeof(struct ra_class *)); + /* Get list of program variables */ + variables = rc_get_variables(c); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&c->Pool, node_count * sizeof(struct ra_class *)); - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - unsigned int class_index = 0; - int index; - /* Compute the live intervals */ - rc_variable_compute_live_intervals(var_ptr->Item); - unsigned int writemask = rc_variable_writemask_sum(var_ptr->Item); - index = rc_find_class(c->regalloc_state->class_list, writemask, 6); - if (index > -1) { - class_index = c->regalloc_state->class_list[index].ID; - } else { - rc_error(c, - "Could not find class for index=%u mask=%u\n", - ((struct rc_variable *)var_ptr->Item)->Dst.Index, writemask); - } - node_classes[node_index] = ra_state->classes[class_index]; - } + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index = 0; + int index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + unsigned int writemask = rc_variable_writemask_sum(var_ptr->Item); + index = rc_find_class(c->regalloc_state->class_list, writemask, 6); + if (index > -1) { + class_index = c->regalloc_state->class_list[index].ID; + } else { + rc_error(c, "Could not find class for index=%u mask=%u\n", + ((struct rc_variable *)var_ptr->Item)->Dst.Index, writemask); + } + node_classes[node_index] = ra_state->classes[class_index]; + } - graph = ra_alloc_interference_graph(ra_state->regs, node_count); + graph = ra_alloc_interference_graph(ra_state->regs, node_count); - for (node_index = 0; node_index < node_count; node_index++) { - ra_set_node_class(graph, node_index, node_classes[node_index]); - } + for (node_index = 0; node_index < node_count; node_index++) { + ra_set_node_class(graph, node_index, node_classes[node_index]); + } - rc_build_interference_graph(graph, variables); + rc_build_interference_graph(graph, variables); - if (!ra_allocate(graph)) { - rc_error(c, "Ran out of hardware temporaries\n"); - ralloc_free(graph); - return; - } + if (!ra_allocate(graph)) { + rc_error(c, "Ran out of hardware temporaries\n"); + ralloc_free(graph); + return; + } - /* Rewrite the registers */ - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - int reg = ra_get_node_reg(graph, node_index); - unsigned int writemask = reg_get_writemask(reg); - unsigned int index = reg_get_index(reg); - struct rc_variable * var = var_ptr->Item; + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable *var = var_ptr->Item; - rc_variable_change_dst(var, index, writemask); - } + rc_variable_change_dst(var, index, writemask); + } - ralloc_free(graph); + ralloc_free(graph); } /** * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ -static int transform_source_conflicts( - struct radeon_compiler *c, - struct rc_instruction* inst, - void* unused) +static int +transform_source_conflicts(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->NumSrcRegs == 3) { - if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) - || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { - int tmpreg = rc_find_free_temporary(c); - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = tmpreg; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; - inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst_mov->U.I.SrcReg[0].Negate = 0; - inst_mov->U.I.SrcReg[0].Abs = 0; + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) || + t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst_mov->U.I.SrcReg[0].Negate = 0; + inst_mov->U.I.SrcReg[0].Abs = 0; - inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[2].Index = tmpreg; - inst->U.I.SrcReg[2].RelAddr = false; - } - } + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + inst->U.I.SrcReg[2].RelAddr = false; + } + } - if (opcode->NumSrcRegs >= 2) { - if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { - int tmpreg = rc_find_free_temporary(c); - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = tmpreg; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; - inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst_mov->U.I.SrcReg[0].Negate = 0; - inst_mov->U.I.SrcReg[0].Abs = 0; + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + inst_mov->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst_mov->U.I.SrcReg[0].Negate = 0; + inst_mov->U.I.SrcReg[0].Abs = 0; - inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[1].Index = tmpreg; - inst->U.I.SrcReg[1].RelAddr = false; - } - } + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + inst->U.I.SrcReg[1].RelAddr = false; + } + } - return 1; + return 1; } -static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) +static void +rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) { - struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; - int i; + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler *)c; + int i; - for(i = 0; i < 32; ++i) { - if ((compiler->RequiredOutputs & (1U << i)) && - !(compiler->Base.Program.OutputsWritten & (1U << i))) { - struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->U.I.Opcode = RC_OPCODE_MOV; + for (i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1U << i)) && + !(compiler->Base.Program.OutputsWritten & (1U << i))) { + struct rc_instruction *inst = + rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = i; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; - inst->U.I.SrcReg[0].Index = 0; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - compiler->Base.Program.OutputsWritten |= 1U << i; - } - } + compiler->Base.Program.OutputsWritten |= 1U << i; + } + } } -static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +static int +swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { - (void) opcode; - (void) reg; + (void)opcode; + (void)reg; - return 1; + return 1; } const struct rc_swizzle_caps r300_vertprog_swizzle_caps = { - .IsNative = &swizzle_is_native, - .Split = NULL /* should never be called */ + .IsNative = &swizzle_is_native, .Split = NULL /* should never be called */ }; -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) +void +r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { - int is_r500 = c->Base.is_r500; - int opt = !c->Base.disable_optimizations; + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + bool debug = c->Base.Debug & RC_DBG_LOG; - /* Lists of instruction transformations. */ - struct radeon_program_transformation alu_rewrite[] = { - { &r300_transform_vertex_alu, NULL }, - { NULL, NULL } - }; + /* Lists of instruction transformations. */ + struct radeon_program_transformation alu_rewrite[] = {{&r300_transform_vertex_alu, NULL}, + {NULL, NULL}}; - struct radeon_program_transformation resolve_src_conflicts[] = { - { &transform_source_conflicts, NULL }, - { NULL, NULL } - }; + struct radeon_program_transformation resolve_src_conflicts[] = { + {&transform_source_conflicts, NULL}, + {NULL, NULL}}; - /* List of compiler passes. */ - struct radeon_compiler_pass vs_list[] = { - /* NAME DUMP PREDICATE FUNCTION PARAM */ - {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, - {"native rewrite", 1, 1, rc_local_transform, alu_rewrite}, - {"unused channels", 1, opt, rc_mark_unused_channels, NULL}, - {"dataflow optimize", 1, opt, rc_optimize, NULL}, - {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, - /* This pass must be done after optimizations. */ - {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL}, - {"final code validation", 0, 1, rc_validate_final_shader, NULL}, - {"machine code generation", 0, 1, translate_vertex_program, NULL}, - {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, - {NULL, 0, 0, NULL, NULL} - }; + /* List of compiler passes. */ + struct radeon_compiler_pass vs_list[] = { + /* clang-format off */ + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, + {"native rewrite", 1, 1, rc_local_transform, alu_rewrite}, + {"unused channels", 1, opt, rc_mark_unused_channels, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + /* This pass must be done after optimizations. */ + {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, + {"register allocation", 1, opt, allocate_temporary_registers, NULL}, + {"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, 1, translate_vertex_program, NULL}, + {"dump machine code", 0, debug, r300_vertex_program_dump, NULL}, + {NULL, 0, 0, NULL, NULL}}; + /* clang-format on */ - c->Base.type = RC_VERTEX_PROGRAM; - c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + c->Base.type = RC_VERTEX_PROGRAM; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - rc_run_compiler(&c->Base, vs_list); + rc_run_compiler(&c->Base, vs_list); - c->code->InputsRead = c->Base.Program.InputsRead; - c->code->OutputsWritten = c->Base.Program.OutputsWritten; - rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c index a3fdb6106ff..79e27fa1e43 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c @@ -3,197 +3,200 @@ * SPDX-License-Identifier: MIT */ -#include "radeon_compiler.h" -#include "radeon_code.h" #include "r300_reg.h" +#include "radeon_code.h" +#include "radeon_compiler.h" #include -static const char* r300_vs_ve_ops[] = { - /* R300 vector ops */ - " VE_NO_OP", - " VE_DOT_PRODUCT", - " VE_MULTIPLY", - " VE_ADD", - " VE_MULTIPLY_ADD", - " VE_DISTANCE_FACTOR", - " VE_FRACTION", - " VE_MAXIMUM", - " VE_MINIMUM", - "VE_SET_GREATER_THAN_EQUAL", - " VE_SET_LESS_THAN", - " VE_MULTIPLYX2_ADD", - " VE_MULTIPLY_CLAMP", - " VE_FLT2FIX_DX", - " VE_FLT2FIX_DX_RND", - /* R500 vector ops */ - " VE_PRED_SET_EQ_PUSH", - " VE_PRED_SET_GT_PUSH", - " VE_PRED_SET_GTE_PUSH", - " VE_PRED_SET_NEQ_PUSH", - " VE_COND_WRITE_EQ", - " VE_COND_WRITE_GT", - " VE_COND_WRITE_GTE", - " VE_COND_WRITE_NEQ", - " VE_COND_MUX_EQ", - " VE_COND_MUX_GT", - " VE_COND_MUX_GTE", - " VE_SET_GREATER_THAN", - " VE_SET_EQUAL", - " VE_SET_NOT_EQUAL", - " (reserved)", - " (reserved)", - " (reserved)", +static const char *r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", }; -static const char* r300_vs_me_ops[] = { - /* R300 math ops */ - " ME_NO_OP", - " ME_EXP_BASE2_DX", - " ME_LOG_BASE2_DX", - " ME_EXP_BASEE_FF", - " ME_LIGHT_COEFF_DX", - " ME_POWER_FUNC_FF", - " ME_RECIP_DX", - " ME_RECIP_FF", - " ME_RECIP_SQRT_DX", - " ME_RECIP_SQRT_FF", - " ME_MULTIPLY", - " ME_EXP_BASE2_FULL_DX", - " ME_LOG_BASE2_FULL_DX", - " ME_POWER_FUNC_FF_CLAMP_B", - "ME_POWER_FUNC_FF_CLAMP_B1", - "ME_POWER_FUNC_FF_CLAMP_01", - " ME_SIN", - " ME_COS", - /* R500 math ops */ - " ME_LOG_BASE2_IEEE", - " ME_RECIP_IEEE", - " ME_RECIP_SQRT_IEEE", - " ME_PRED_SET_EQ", - " ME_PRED_SET_GT", - " ME_PRED_SET_GTE", - " ME_PRED_SET_NEQ", - " ME_PRED_SET_CLR", - " ME_PRED_SET_INV", - " ME_PRED_SET_POP", - " ME_PRED_SET_RESTORE", - " (reserved)", - " (reserved)", - " (reserved)", +static const char *r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", }; /* XXX refactor to avoid clashing symbols */ -static const char* r300_vs_src_debug[] = { - "t", - "i", - "c", - "a", +static const char *r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", }; -static const char* r300_vs_dst_debug[] = { - "t", - "a0", - "o", - "ox", - "a", - "i", - "u", - "u", +static const char *r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", }; -static const char* r300_vs_swiz_debug[] = { - "X", - "Y", - "Z", - "W", - "0", - "1", - "U", - "U", +static const char *r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", }; - -static void r300_vs_op_dump(uint32_t op) +static void +r300_vs_op_dump(uint32_t op) { - fprintf(stderr, " dst: %d%s op: ", - (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); - if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { - fprintf(stderr, "PRED %u", - (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); - } - if (op & 0x80) { - if (op & 0x1) { - fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); - } else { - fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); - } - } else if (op & 0x40) { - fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); - } else { - fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); - } + fprintf(stderr, " dst: %d%s op: ", (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } } -static void r300_vs_src_dump(uint32_t src) +static void +r300_vs_src_dump(uint32_t src) { - fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], - src & (1 << 25) ? "-" : " ", - r300_vs_swiz_debug[(src >> 13) & 0x7], - src & (1 << 26) ? "-" : " ", - r300_vs_swiz_debug[(src >> 16) & 0x7], - src & (1 << 27) ? "-" : " ", - r300_vs_swiz_debug[(src >> 19) & 0x7], - src & (1 << 28) ? "-" : " ", - r300_vs_swiz_debug[(src >> 22) & 0x7]); + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", (src >> 5) & 0xff, + r300_vs_src_debug[src & 0x3], src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); } -void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) +void +r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) { - struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; - struct r300_vertex_program_code * vs = c->code; - unsigned instrcount = vs->length / 4; - unsigned i; + struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler *)compiler; + struct r300_vertex_program_code *vs = c->code; + unsigned instrcount = vs->length / 4; + unsigned i; - fprintf(stderr, "Final vertex program code:\n"); + fprintf(stderr, "Final vertex program code:\n"); - for(i = 0; i < instrcount; i++) { - unsigned offset = i*4; - unsigned src; + for (i = 0; i < instrcount; i++) { + unsigned offset = i * 4; + unsigned src; - fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); - r300_vs_op_dump(vs->body.d[offset]); + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); - for(src = 0; src < 3; ++src) { - fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); - r300_vs_src_dump(vs->body.d[offset+1+src]); - } - } + for (src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset + 1 + src]); + r300_vs_src_dump(vs->body.d[offset + 1 + src]); + } + } - fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); - for(i = 0; i < vs->num_fc_ops; i++) { - unsigned is_loop = 0; - switch((vs->fc_ops >> (i * 2)) & 0x3 ) { - case 0: fprintf(stderr, "NOP"); break; - case 1: fprintf(stderr, "JUMP"); break; - case 2: fprintf(stderr, "LOOP"); is_loop = 1; break; - case 3: fprintf(stderr, "JSR"); break; - } - if (c->Base.is_r500) { - fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x " - "loop data->0x%08x\n", - vs->fc_op_addrs.r500[i].uw, - vs->fc_op_addrs.r500[i].lw, - vs->fc_loop_index[i]); - if (is_loop) { - fprintf(stderr, "Before = %u First = %u Last = %u\n", - vs->fc_op_addrs.r500[i].lw & 0xffff, - (vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff, - vs->fc_op_addrs.r500[i].uw & 0xffff); - } - } else { - fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); - } - } + fprintf(stderr, "Flow Control Ops: 0x%08x\n", vs->fc_ops); + for (i = 0; i < vs->num_fc_ops; i++) { + unsigned is_loop = 0; + switch ((vs->fc_ops >> (i * 2)) & 0x3) { + case 0: + fprintf(stderr, "NOP"); + break; + case 1: + fprintf(stderr, "JUMP"); + break; + case 2: + fprintf(stderr, "LOOP"); + is_loop = 1; + break; + case 3: + fprintf(stderr, "JSR"); + break; + } + if (c->Base.is_r500) { + fprintf(stderr, + ": uw-> 0x%08x lw-> 0x%08x " + "loop data->0x%08x\n", + vs->fc_op_addrs.r500[i].uw, vs->fc_op_addrs.r500[i].lw, vs->fc_loop_index[i]); + if (is_loop) { + fprintf( + stderr, "Before = %u First = %u Last = %u\n", vs->fc_op_addrs.r500[i].lw & 0xffff, + (vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff, vs->fc_op_addrs.r500[i].uw & 0xffff); + } + } else { + fprintf(stderr, ": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } } diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c index bb8268b2235..6dccb31f668 100644 --- a/src/gallium/drivers/r300/compiler/r500_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -7,208 +7,194 @@ #include +#include "r300_reg.h" #include "radeon_compiler_util.h" #include "radeon_list.h" #include "radeon_variable.h" -#include "r300_reg.h" #include "util/compiler.h" /** * Rewrite IF instructions to use the ALU result special register. */ -static void r500_transform_IF_instr( - struct radeon_compiler * c, - struct rc_instruction * inst_if, - struct rc_list * var_list) +static void +r500_transform_IF_instr(struct radeon_compiler *c, struct rc_instruction *inst_if, + struct rc_list *var_list) { - struct rc_variable * writer; - struct rc_list * writer_list, * list_ptr; - unsigned int generic_if = 0; - unsigned int alu_chan; + struct rc_variable *writer; + struct rc_list *writer_list, *list_ptr; + unsigned int generic_if = 0; + unsigned int alu_chan; - writer_list = rc_variable_list_get_writers( - var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); - if (!writer_list) { - generic_if = 1; - } else { + writer_list = rc_variable_list_get_writers(var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { - /* Make sure it is safe for the writers to write to - * ALU Result */ - for (list_ptr = writer_list; list_ptr; - list_ptr = list_ptr->Next) { - struct rc_instruction * inst; - writer = list_ptr->Item; - /* We are going to modify the destination register - * of writer, so if it has a reader other than - * inst_if (aka ReaderCount > 1) we must fall back to - * our generic IF. - * If the writer has a lower IP than inst_if, this - * means that inst_if is above the writer in a loop. - * I'm not sure why this would ever happen, but - * if it does we want to make sure we fall back - * to our generic IF. */ - if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { - generic_if = 1; - break; - } + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_instruction *inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } - /* The ALU Result is not preserved across IF - * instructions, so if there is another IF - * instruction between writer and inst_if, then - * we need to fall back to generic IF. */ - for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - if (info->IsFlowControl) { - generic_if = 1; - break; - } - } - if (generic_if) { - break; - } - } - } + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } - if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { - alu_chan = RC_ALURESULT_X; - } else { - alu_chan = RC_ALURESULT_W; - } - if (generic_if) { - struct rc_instruction * inst_mov = - rc_insert_new_instruction(c, inst_if->Prev); + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst_if->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.WriteMask = 0; - inst_mov->U.I.DstReg.File = RC_FILE_NONE; - inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; - inst_mov->U.I.WriteALUResult = alu_chan; - inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; - if (alu_chan == RC_ALURESULT_X) { - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( - inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); - } else { - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( - inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); - } - } else { - rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; - unsigned int preserve_opcode = 0; - for (list_ptr = writer_list; list_ptr; - list_ptr = list_ptr->Next) { - writer = list_ptr->Item; - switch(writer->Inst->U.I.Opcode) { - case RC_OPCODE_SEQ: - compare_func = RC_COMPARE_FUNC_EQUAL; - break; - case RC_OPCODE_SNE: - compare_func = RC_COMPARE_FUNC_NOTEQUAL; - break; - case RC_OPCODE_SGE: - compare_func = RC_COMPARE_FUNC_GEQUAL; - break; - case RC_OPCODE_SLT: - compare_func = RC_COMPARE_FUNC_LESS; - break; - default: - compare_func = RC_COMPARE_FUNC_NOTEQUAL; - preserve_opcode = 1; - break; - } - if (!preserve_opcode) { - writer->Inst->U.I.Opcode = RC_OPCODE_ADD; - writer->Inst->U.I.SrcReg[1].Negate = - ~writer->Inst->U.I.SrcReg[1].Negate; - } - writer->Inst->U.I.DstReg.WriteMask = 0; - writer->Inst->U.I.DstReg.File = RC_FILE_NONE; - writer->Inst->U.I.WriteALUResult = alu_chan; - writer->Inst->U.I.ALUResultCompare = compare_func; - } - } + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = + combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = + combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch (writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_ADD; + writer->Inst->U.I.SrcReg[1].Negate = ~writer->Inst->U.I.SrcReg[1].Negate; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + } + } - inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; - inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; - inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( - RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); - inst_if->U.I.SrcReg[0].Negate = 0; + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; } -void r500_transform_IF( - struct radeon_compiler * c, - void *user) +void +r500_transform_IF(struct radeon_compiler *c, void *user) { - struct rc_list * var_list = rc_get_variables(c); + struct rc_list *var_list = rc_get_variables(c); - struct rc_instruction * inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * current = inst; - inst = inst->Next; + struct rc_instruction *inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction *current = inst; + inst = inst->Next; - if (current->U.I.Opcode == RC_OPCODE_IF) - r500_transform_IF_instr(c, current, var_list); - } + if (current->U.I.Opcode == RC_OPCODE_IF) + r500_transform_IF_instr(c, current, var_list); + } } -static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +static int +r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { - unsigned int relevant; - int i; + unsigned int relevant; + int i; - if (opcode == RC_OPCODE_TEX || - opcode == RC_OPCODE_TXB || - opcode == RC_OPCODE_TXP || - opcode == RC_OPCODE_TXD || - opcode == RC_OPCODE_TXL || - opcode == RC_OPCODE_KIL) { - if (reg.Abs) - return 0; + if (opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || opcode == RC_OPCODE_TXL || opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; - if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) - return 0; + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; - for(i = 0; i < 4; ++i) { - unsigned int swz = GET_SWZ(reg.Swizzle, i); - if (swz == RC_SWIZZLE_UNUSED) { - reg.Negate &= ~(1 << i); - continue; - } - if (swz >= 4) - return 0; - } + for (i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } - if (reg.Negate) - return 0; + if (reg.Negate) + return 0; - return 1; - } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { - /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; - * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) - return 1; + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; - return 0; - } else { - /* ALU instructions support almost everything */ - relevant = 0; - for(i = 0; i < 3; ++i) { - unsigned int swz = GET_SWZ(reg.Swizzle, i); - if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) - relevant |= 1 << i; - } - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return 0; + return 0; + } else { + /* ALU instructions support almost everything */ + relevant = 0; + for (i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; - return 1; - } + return 1; + } } /** @@ -217,301 +203,273 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) * The only thing we *cannot* do in an ALU instruction is per-component * negation. */ -static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, - struct rc_swizzle_split * split) +static void +r500_swizzle_split(struct rc_src_register src, unsigned int usemask, struct rc_swizzle_split *split) { - unsigned int negatebase[2] = { 0, 0 }; - int i; + unsigned int negatebase[2] = {0, 0}; + int i; - for(i = 0; i < 4; ++i) { - unsigned int swz = GET_SWZ(src.Swizzle, i); - if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) - continue; - negatebase[GET_BIT(src.Negate, i)] |= 1 << i; - } + for (i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } - split->NumPhases = 0; + split->NumPhases = 0; - for(i = 0; i <= 1; ++i) { - if (!negatebase[i]) - continue; + for (i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; - split->Phase[split->NumPhases++] = negatebase[i]; - } + split->Phase[split->NumPhases++] = negatebase[i]; + } } -const struct rc_swizzle_caps r500_swizzle_caps = { - .IsNative = r500_swizzle_is_native, - .Split = r500_swizzle_split -}; +const struct rc_swizzle_caps r500_swizzle_caps = {.IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split}; -static char *toswiz(int swiz_val) { - switch(swiz_val) { - case 0: return "R"; - case 1: return "G"; - case 2: return "B"; - case 3: return "A"; - case 4: return "0"; - case 5: return "H"; - case 6: return "1"; - case 7: return "U"; - } - return NULL; +static char * +toswiz(int swiz_val) +{ + switch (swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; } -static char *toop(int op_val) +static char * +toop(int op_val) { - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP3"; break; - case 2: str = "DP4"; break; - case 3: str = "D2A"; break; - case 4: str = "MIN"; break; - case 5: str = "MAX"; break; - case 6: str = "Reserved"; break; - case 7: str = "CND"; break; - case 8: str = "CMP"; break; - case 9: str = "FRC"; break; - case 10: str = "SOP"; break; - case 11: str = "MDH"; break; - case 12: str = "MDV"; break; - } - return str; + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; } -static char *to_alpha_op(int op_val) +static char * +to_alpha_op(int op_val) { - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP"; break; - case 2: str = "MIN"; break; - case 3: str = "MAX"; break; - case 4: str = "Reserved"; break; - case 5: str = "CND"; break; - case 6: str = "CMP"; break; - case 7: str = "FRC"; break; - case 8: str = "EX2"; break; - case 9: str = "LN2"; break; - case 10: str = "RCP"; break; - case 11: str = "RSQ"; break; - case 12: str = "SIN"; break; - case 13: str = "COS"; break; - case 14: str = "MDH"; break; - case 15: str = "MDV"; break; - } - return str; + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; } -static char *to_mask(int val) +static char * +to_mask(int val) { - char *str = NULL; - switch(val) { - case 0: str = "NONE"; break; - case 1: str = "R"; break; - case 2: str = "G"; break; - case 3: str = "RG"; break; - case 4: str = "B"; break; - case 5: str = "RB"; break; - case 6: str = "GB"; break; - case 7: str = "RGB"; break; - case 8: str = "A"; break; - case 9: str = "AR"; break; - case 10: str = "AG"; break; - case 11: str = "ARG"; break; - case 12: str = "AB"; break; - case 13: str = "ARB"; break; - case 14: str = "AGB"; break; - case 15: str = "ARGB"; break; - } - return str; + char *str = NULL; + switch (val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; } -static char *to_texop(int val) +static char * +to_texop(int val) { - switch(val) { - case 0: return "NOP"; - case 1: return "LD"; - case 2: return "TEXKILL"; - case 3: return "PROJ"; - case 4: return "LODBIAS"; - case 5: return "LOD"; - case 6: return "DXDY"; - } - return NULL; + switch (val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; } -void r500FragmentProgramDump(struct radeon_compiler *c, void *user) +void +r500FragmentProgramDump(struct radeon_compiler *c, void *user) { - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r500_fragment_program_code *code = &compiler->code->code.r500; - int n, i; - uint32_t inst; - uint32_t inst0; - char *str = NULL; - fprintf(stderr, "R500 Fragment Program:\n--------\n"); + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + int n, i; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); - for (n = 0; n < code->inst_end+1; n++) { - inst0 = inst = code->inst[n].inst0; - fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); - switch(inst & 0x3) { - case R500_INST_TYPE_ALU: str = "ALU"; break; - case R500_INST_TYPE_OUT: str = "OUT"; break; - case R500_INST_TYPE_FC: str = "FC"; break; - case R500_INST_TYPE_TEX: str = "TEX"; break; - } - fprintf(stderr,"%s %s %s %s %s ", str, - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); - fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), - to_mask((inst >> 15) & 0xf)); - - switch(inst0 & 0x3) { - case R500_INST_TYPE_ALU: - case R500_INST_TYPE_OUT: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); - inst = code->inst[n].inst1; - - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); - inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), - (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3, (inst >> 29) & 0x3); - - - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); - inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, - (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, - (inst >> 29) & 0x3, - (inst >> 31) & 0x1); - - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); - inst = code->inst[n].inst5; - fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), - (inst >> 23) & 0x3, - (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); - break; - case R500_INST_TYPE_FC: - fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - /* JUMP_FUNC JUMP_ANY*/ - fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, - (inst & R500_FC_JUMP_ANY) >> 5); - - /* OP */ - switch(inst & 0x7){ - case R500_FC_OP_JUMP: - fprintf(stderr, "JUMP"); - break; - case R500_FC_OP_LOOP: - fprintf(stderr, "LOOP"); - break; - case R500_FC_OP_ENDLOOP: - fprintf(stderr, "ENDLOOP"); - break; - case R500_FC_OP_REP: - fprintf(stderr, "REP"); - break; - case R500_FC_OP_ENDREP: - fprintf(stderr, "ENDREP"); - break; - case R500_FC_OP_BREAKLOOP: - fprintf(stderr, "BREAKLOOP"); - break; - case R500_FC_OP_BREAKREP: - fprintf(stderr, "BREAKREP"); - break; - case R500_FC_OP_CONTINUE: - fprintf(stderr, "CONTINUE"); - break; + for (n = 0; n < code->inst_end + 1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr, "%d\t0:CMN_INST 0x%08x:", n, inst); + switch (inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; } - fprintf(stderr," "); - /* A_OP */ - switch(inst & (0x3 << 6)){ - case R500_FC_A_OP_NONE: - fprintf(stderr, "NONE"); - break; - case R500_FC_A_OP_POP: - fprintf(stderr, "POP"); - break; - case R500_FC_A_OP_PUSH: - fprintf(stderr, "PUSH"); - break; - } - /* B_OP0 B_OP1 */ - for(i=0; i<2; i++){ - fprintf(stderr, " "); - switch(inst & (0x3 << (24 + (i * 2)))){ - /* R500_FC_B_OP0_NONE - * R500_FC_B_OP1_NONE */ - case 0: - fprintf(stderr, "NONE"); - break; - case R500_FC_B_OP0_DECR: - case R500_FC_B_OP1_DECR: - fprintf(stderr, "DECR"); - break; - case R500_FC_B_OP0_INCR: - case R500_FC_B_OP1_INCR: - fprintf(stderr, "INCR"); - break; - } - } - /*POP_CNT B_ELSE */ - fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); - inst = code->inst[n].inst3; - /* JUMP_ADDR */ - fprintf(stderr, " %d", inst >> 16); - - if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ - fprintf(stderr, " IGN_UNC"); - } - inst = code->inst[n].inst3; - fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); - fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", - inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); - break; - case R500_INST_TYPE_TEX: - inst = code->inst[n].inst1; - fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, - to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", - (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = code->inst[n].inst2; - fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, - inst & 127, inst & (1<<7) ? "(rel)" : "", - toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), - toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), - (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", - toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), - toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + fprintf(stderr, "%s %s %s %s %s ", str, inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr, "wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); - break; - } - fprintf(stderr,"\n"); - } + switch (inst0 & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + fprintf(stderr, "\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + fprintf(stderr, "Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, + (inst & (1 << 8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', (inst >> 30)); + + fprintf(stderr, "\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr, "Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, + (inst & (1 << 8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', (inst >> 30)); + fprintf(stderr, "\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr, "rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), + toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, (inst >> 13) & 0x3, + toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + fprintf(stderr, "\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr, "%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", + to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1 << 11) ? "(rel)" : "", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, (inst >> 31) & 0x1); + + fprintf(stderr, "\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr, "%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", + toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1 << 11) ? "(rel)" : "", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), + toswiz((inst >> 20) & 0x7), (inst >> 23) & 0x3, (inst >> 25) & 0x3, + toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch (inst & 0x7) { + case R500_FC_OP_JUMP: fprintf(stderr, "JUMP"); break; + case R500_FC_OP_LOOP: fprintf(stderr, "LOOP"); break; + case R500_FC_OP_ENDLOOP: fprintf(stderr, "ENDLOOP"); break; + case R500_FC_OP_REP: fprintf(stderr, "REP"); break; + case R500_FC_OP_ENDREP: fprintf(stderr, "ENDREP"); break; + case R500_FC_OP_BREAKLOOP: fprintf(stderr, "BREAKLOOP"); break; + case R500_FC_OP_BREAKREP: fprintf(stderr, "BREAKREP"); break; + case R500_FC_OP_CONTINUE: fprintf(stderr, "CONTINUE"); break; + } + fprintf(stderr, " "); + /* A_OP */ + switch (inst & (0x3 << 6)) { + case R500_FC_A_OP_NONE: fprintf(stderr, "NONE"); break; + case R500_FC_A_OP_POP: fprintf(stderr, "POP"); break; + case R500_FC_A_OP_PUSH: fprintf(stderr, "PUSH"); break; + } + /* B_OP0 B_OP1 */ + for (i = 0; i < 2; i++) { + fprintf(stderr, " "); + switch (inst & (0x3 << (24 + (i * 2)))) { + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if (code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED) { + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", inst & 0x1f, + (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); + break; + case R500_INST_TYPE_TEX: + inst = code->inst[n].inst1; + fprintf(stderr, "\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, + (inst >> 16) & 0xf, to_texop((inst >> 22) & 0x7), (inst & (1 << 25)) ? "ACQ" : "", + (inst & (1 << 26)) ? "IGNUNC" : "", (inst & (1 << 27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr, "\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", + inst, inst & 127, inst & (1 << 7) ? "(rel)" : "", toswiz((inst >> 8) & 0x3), + toswiz((inst >> 10) & 0x3), toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1 << 23) ? "(rel)" : "", toswiz((inst >> 24) & 0x3), + toswiz((inst >> 26) & 0x3), toswiz((inst >> 28) & 0x3), + toswiz((inst >> 30) & 0x3)); + + fprintf(stderr, "\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr, "\n"); + } } diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h index 1a8550d815c..ff5dd48e7a2 100644 --- a/src/gallium/drivers/r300/compiler/r500_fragprog.h +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h @@ -18,8 +18,6 @@ extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); extern const struct rc_swizzle_caps r500_swizzle_caps; -extern void r500_transform_IF( - struct radeon_compiler * c, - void* data); +extern void r500_transform_IF(struct radeon_compiler *c, void *data); #endif diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c index a8703838647..8aeefba5697 100644 --- a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -23,646 +23,619 @@ #include "util/compiler.h" -#define PROG_CODE \ - struct r500_fragment_program_code *code = &c->code->code.r500 - -#define error(fmt, args...) do { \ - rc_error(&c->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __func__, ##args); \ - } while(0) +#define PROG_CODE struct r500_fragment_program_code *code = &c->code->code.r500 +#define error(fmt, args...) \ + do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \ + } while (0) struct branch_info { - int If; - int Else; - int Endif; + int If; + int Else; + int Endif; }; struct r500_loop_info { - int BgnLoop; + int BgnLoop; - int BranchDepth; - int * Brks; - int BrkCount; - int BrkReserved; + int BranchDepth; + int *Brks; + int BrkCount; + int BrkReserved; - int * Conts; - int ContCount; - int ContReserved; + int *Conts; + int ContCount; + int ContReserved; }; struct emit_state { - struct radeon_compiler * C; - struct r500_fragment_program_code * Code; + struct radeon_compiler *C; + struct r500_fragment_program_code *Code; - struct branch_info * Branches; - unsigned int CurrentBranchDepth; - unsigned int BranchesReserved; + struct branch_info *Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; - struct r500_loop_info * Loops; - unsigned int CurrentLoopDepth; - unsigned int LoopsReserved; - - unsigned int MaxBranchDepth; + struct r500_loop_info *Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + unsigned int MaxBranchDepth; }; -static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +static unsigned int +translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { - switch(opcode) { - case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; - case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; - default: - error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); - FALLTHROUGH; - case RC_OPCODE_NOP: - FALLTHROUGH; - case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; - } + switch (opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + FALLTHROUGH; + case RC_OPCODE_NOP: FALLTHROUGH; + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } } -static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +static unsigned int +translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { - switch(opcode) { - case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; - case RC_OPCODE_CND: return R500_ALPHA_OP_CND; - case RC_OPCODE_COS: return R500_ALPHA_OP_COS; - case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; - case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; - case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; - case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; - case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; - case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; - case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; - default: - error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); - FALLTHROUGH; - case RC_OPCODE_NOP: - FALLTHROUGH; - case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; - case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; - case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; - case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; - case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; - } + switch (opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + FALLTHROUGH; + case RC_OPCODE_NOP: FALLTHROUGH; + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } } -static unsigned int fix_hw_swizzle(unsigned int swz) +static unsigned int +fix_hw_swizzle(unsigned int swz) { - switch (swz) { - case RC_SWIZZLE_ZERO: - case RC_SWIZZLE_UNUSED: - swz = 4; - break; - case RC_SWIZZLE_HALF: - swz = 5; - break; - case RC_SWIZZLE_ONE: - swz = 6; - break; - } + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: swz = 4; break; + case RC_SWIZZLE_HALF: swz = 5; break; + case RC_SWIZZLE_ONE: swz = 6; break; + } - return swz; + return swz; } -static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +static unsigned int +translate_arg_rgb(struct rc_pair_instruction *inst, int arg) { - unsigned int t = inst->RGB.Arg[arg].Source; - int comp; - t |= inst->RGB.Arg[arg].Negate << 11; - t |= inst->RGB.Arg[arg].Abs << 12; + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; - for(comp = 0; comp < 3; ++comp) - t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + for (comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3 * comp + 2); - return t; + return t; } -static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +static unsigned int +translate_arg_alpha(struct rc_pair_instruction *inst, int i) { - unsigned int t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; - t |= inst->Alpha.Arg[i].Negate << 5; - t |= inst->Alpha.Arg[i].Abs << 6; - return t; + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; } -static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +static uint32_t +translate_alu_result_op(struct r300_fragment_program_compiler *c, rc_compare_func func) { - switch(func) { - case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; - case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; - case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; - case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; - default: - rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func); - return 0; - } + switch (func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func); + return 0; + } } -static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +static void +use_temporary(struct r500_fragment_program_code *code, unsigned int index) { - if (index > code->max_temp_idx) - code->max_temp_idx = index; + if (index > code->max_temp_idx) + code->max_temp_idx = index; } -static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) +static unsigned int +use_source(struct r500_fragment_program_code *code, struct rc_pair_instruction_source src) { - /* From docs: - * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. - * MSB = 1 << 7 */ - if (!src.Used) - return 1 << 7; + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ + if (!src.Used) + return 1 << 7; - if (src.File == RC_FILE_CONSTANT) { - return src.Index | R500_RGB_ADDR0_CONST; - } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { - use_temporary(code, src.Index); - return src.Index; - } else if (src.File == RC_FILE_INLINE) { - return src.Index | (1 << 7); - } + if (src.File == RC_FILE_CONSTANT) { + return src.Index | R500_RGB_ADDR0_CONST; + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index; + } else if (src.File == RC_FILE_INLINE) { + return src.Index | (1 << 7); + } - return 0; + return 0; } /** * NOP the specified instruction if it is not a texture lookup. */ -static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +static void +alu_nop(struct r300_fragment_program_compiler *c, int ip) { - PROG_CODE; + PROG_CODE; - if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { - code->inst[ip].inst0 |= R500_INST_NOP; - } + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } } /** * Emit a paired ALU instruction. */ -static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +static void +emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { - int ip; - PROG_CODE; + int ip; + PROG_CODE; - if (code->inst_end >= c->Base.max_alu_insts-1) { - error("emit_alu: Too many instructions"); - return; - } + if (code->inst_end >= c->Base.max_alu_insts - 1) { + error("emit_alu: Too many instructions"); + return; + } - ip = ++code->inst_end; + ip = ++code->inst_end; - /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ - if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || - inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { - if (ip > 0) { - alu_nop(c, ip - 1); - } - } + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } - code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { - code->inst[ip].inst0 = R500_INST_TYPE_OUT; - if (inst->WriteALUResult) { - error("Cannot write output and ALU result at the same time"); - return; - } - } else { - code->inst[ip].inst0 = R500_INST_TYPE_ALU; - } - code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT); + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("Cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT); - code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); - code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; - code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); - if (inst->Nop) { - code->inst[ip].inst0 |= R500_INST_NOP; - } - if (inst->Alpha.DepthWriteMask) { - code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = 1; - } + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } - code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); - code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); - if (inst->Alpha.WriteMask) - use_temporary(code, inst->Alpha.DestIndex); - if (inst->RGB.WriteMask) - use_temporary(code, inst->RGB.DestIndex); + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + if (inst->Alpha.WriteMask) + use_temporary(code, inst->Alpha.DestIndex); + if (inst->RGB.WriteMask) + use_temporary(code, inst->RGB.DestIndex); - if (inst->RGB.Saturate) - code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; - if (inst->Alpha.Saturate) - code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; - /* Set the presubtract operation. */ - switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; - break; - case RC_PRESUB_SUB: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; - break; - case RC_PRESUB_ADD: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; - break; - case RC_PRESUB_INV: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; - break; - default: - break; - } - switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; - break; - case RC_PRESUB_SUB: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; - break; - case RC_PRESUB_ADD: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; - break; - case RC_PRESUB_INV: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; - break; - default: - break; - } + /* Set the presubtract operation. */ + switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } - /* Set the output modifier */ - code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT; - code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT; + /* Set the output modifier */ + code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT; + code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT; - code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); - code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); - code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); - code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; - code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; - code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; - code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); - code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); - if (inst->WriteALUResult) { - code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; - if (inst->WriteALUResult == RC_ALURESULT_X) - code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; - else - code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; - code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); - } + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } } -static unsigned int translate_strq_swizzle(unsigned int swizzle) +static unsigned int +translate_strq_swizzle(unsigned int swizzle) { - unsigned int swiz = 0; - int i; - for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; - return swiz; + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i * 2; + return swiz; } /** * Emit a single TEX instruction */ -static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +static int +emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { - int ip; - PROG_CODE; + int ip; + PROG_CODE; - if (code->inst_end >= c->Base.max_alu_insts-1) { - error("emit_tex: Too many instructions"); - return 0; - } + if (code->inst_end >= c->Base.max_alu_insts - 1) { + error("emit_tex: Too many instructions"); + return 0; + } - ip = ++code->inst_end; + ip = ++code->inst_end; - code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) - | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); - code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT); + code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) | + (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); + code->inst[ip].inst1 = + R500_TEX_ID(inst->TexSrcUnit) | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT); - if (inst->TexSrcTarget == RC_TEXTURE_RECT) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; - switch (inst->Opcode) { - case RC_OPCODE_KIL: - code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; - break; - case RC_OPCODE_TEX: - code->inst[ip].inst1 |= R500_TEX_INST_LD; - break; - case RC_OPCODE_TXB: - code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; - break; - case RC_OPCODE_TXP: - code->inst[ip].inst1 |= R500_TEX_INST_PROJ; - break; - case RC_OPCODE_TXD: - code->inst[ip].inst1 |= R500_TEX_INST_DXDY; - break; - case RC_OPCODE_TXL: - code->inst[ip].inst1 |= R500_TEX_INST_LOD; - break; - default: - error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); - } + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } - use_temporary(code, inst->SrcReg[0].Index); - if (inst->Opcode != RC_OPCODE_KIL) - use_temporary(code, inst->DstReg.Index); + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) - | (GET_SWZ(inst->TexSwizzle, 0) << 24) - | (GET_SWZ(inst->TexSwizzle, 1) << 26) - | (GET_SWZ(inst->TexSwizzle, 2) << 28) - | (GET_SWZ(inst->TexSwizzle, 3) << 30) - ; + code->inst[ip].inst2 = + R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) | + (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) | + R500_TEX_DST_ADDR(inst->DstReg.Index) | (GET_SWZ(inst->TexSwizzle, 0) << 24) | + (GET_SWZ(inst->TexSwizzle, 1) << 26) | (GET_SWZ(inst->TexSwizzle, 2) << 28) | + (GET_SWZ(inst->TexSwizzle, 3) << 30); - if (inst->Opcode == RC_OPCODE_TXD) { - use_temporary(code, inst->SrcReg[1].Index); - use_temporary(code, inst->SrcReg[2].Index); + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); - /* DX and DY parameters are specified in a separate register. */ - code->inst[ip].inst3 = - R500_DX_ADDR(inst->SrcReg[1].Index) | - (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | - R500_DY_ADDR(inst->SrcReg[2].Index) | - (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); - } + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } - return 1; + return 1; } -static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +static void +emit_flowcontrol(struct emit_state *s, struct rc_instruction *inst) { - unsigned int newip; + unsigned int newip; - if (s->Code->inst_end >= s->C->max_alu_insts-1) { - rc_error(s->C, "emit_tex: Too many instructions"); - return; - } + if (s->Code->inst_end >= s->C->max_alu_insts - 1) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } - newip = ++s->Code->inst_end; + newip = ++s->Code->inst_end; - /* Currently all loops use the same integer constant to initialize - * the loop variables. */ - if(!s->Code->int_constants[0]) { - s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); - s->Code->int_constant_count = 1; - } - s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; - s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); + /* Currently all loops use the same integer constant to initialize + * the loop variables. */ + if (!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); - switch(inst->U.I.Opcode){ - struct branch_info * branch; - struct r500_loop_info * loop; - case RC_OPCODE_BGNLOOP: - memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, - s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + switch (inst->U.I.Opcode) { + struct branch_info *branch; + struct r500_loop_info *loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, s->Loops, s->CurrentLoopDepth, + s->LoopsReserved, 1); - loop = &s->Loops[s->CurrentLoopDepth++]; - memset(loop, 0, sizeof(struct r500_loop_info)); - loop->BranchDepth = s->CurrentBranchDepth; - loop->BgnLoop = newip; + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; - s->Code->inst[newip].inst2 = R500_FC_OP_LOOP - | R500_FC_JUMP_FUNC(0x00) - | R500_FC_IGNORE_UNCOVERED - ; - break; - case RC_OPCODE_BRK: - loop = &s->Loops[s->CurrentLoopDepth - 1]; - memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, - loop->BrkCount, loop->BrkReserved, 1); + s->Code->inst[newip].inst2 = + R500_FC_OP_LOOP | R500_FC_JUMP_FUNC(0x00) | R500_FC_IGNORE_UNCOVERED; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, loop->BrkCount, loop->BrkReserved, 1); - loop->Brks[loop->BrkCount++] = newip; - s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_B_OP1_DECR - | R500_FC_B_POP_CNT( - s->CurrentBranchDepth - loop->BranchDepth) - | R500_FC_IGNORE_UNCOVERED - ; - break; + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = + R500_FC_OP_BREAKLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR | + R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED; + break; - case RC_OPCODE_CONT: - loop = &s->Loops[s->CurrentLoopDepth - 1]; - memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, - loop->ContCount, loop->ContReserved, 1); - loop->Conts[loop->ContCount++] = newip; - s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_B_OP1_DECR - | R500_FC_B_POP_CNT( - s->CurrentBranchDepth - loop->BranchDepth) - | R500_FC_IGNORE_UNCOVERED - ; - break; + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, loop->ContCount, loop->ContReserved, + 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = + R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR | + R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED; + break; - case RC_OPCODE_ENDLOOP: - { - loop = &s->Loops[s->CurrentLoopDepth - 1]; - /* Emit ENDLOOP */ - s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_JUMP_ANY - | R500_FC_IGNORE_UNCOVERED - ; - /* The constant integer at index 0 is used by all loops. */ - s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) - | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) - ; + case RC_OPCODE_ENDLOOP: { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = + R500_FC_OP_ENDLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_JUMP_ANY | R500_FC_IGNORE_UNCOVERED; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(loop->BgnLoop + 1); - /* Set jump address and int constant for BGNLOOP */ - s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) - | R500_FC_JUMP_ADDR(newip) - ; + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(newip); - /* Set jump address for the BRK instructions. */ - while(loop->BrkCount--) { - s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = - R500_FC_JUMP_ADDR(newip + 1); - } + /* Set jump address for the BRK instructions. */ + while (loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1); + } - /* Set jump address for CONT instructions. */ - while(loop->ContCount--) { - s->Code->inst[loop->Conts[loop->ContCount]].inst3 = - R500_FC_JUMP_ADDR(newip); - } - s->CurrentLoopDepth--; - break; - } - case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { - rc_error(s->C, "Branch depth exceeds hardware limit"); - return; - } - memory_pool_array_reserve(&s->C->Pool, struct branch_info, - s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + /* Set jump address for CONT instructions. */ + while (loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if (s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->CurrentBranchDepth, + s->BranchesReserved, 1); - branch = &s->Branches[s->CurrentBranchDepth++]; - branch->If = newip; - branch->Else = -1; - branch->Endif = -1; + branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; - if (s->CurrentBranchDepth > s->MaxBranchDepth) - s->MaxBranchDepth = s->CurrentBranchDepth; + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; - /* actual instruction is filled in at ENDIF time */ - break; - - case RC_OPCODE_ELSE: - if (!s->CurrentBranchDepth) { - rc_error(s->C, "%s: got ELSE outside a branch", __func__); - return; - } + /* actual instruction is filled in at ENDIF time */ + break; - branch = &s->Branches[s->CurrentBranchDepth - 1]; - branch->Else = newip; + case RC_OPCODE_ELSE: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __func__); + return; + } - /* actual instruction is filled in at ENDIF time */ - break; + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; - case RC_OPCODE_ENDIF: - if (!s->CurrentBranchDepth) { - rc_error(s->C, "%s: got ELSE outside a branch", __func__); - return; - } + /* actual instruction is filled in at ENDIF time */ + break; - branch = &s->Branches[s->CurrentBranchDepth - 1]; - branch->Endif = newip; + case RC_OPCODE_ENDIF: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __func__); + return; + } - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ - | R500_FC_B_OP0_INCR /* increment branch counter if stay */ - | R500_FC_IGNORE_UNCOVERED - ; + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; - if (branch->Else >= 0) { - /* increment branch counter also if jump */ - s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; - s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + s->Code->inst[branch->Endif].inst2 = + R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1); + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED; - s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_B_ELSE /* all active pixels want to jump */ - | R500_FC_B_OP0_NONE /* no counter op if stay */ - | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } else { - /* don't touch branch counter on jump */ - s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; - s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + s->Code->inst[branch->Else].inst2 = + R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1); + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } - s->CurrentBranchDepth--; - break; - default: - rc_error(s->C, "%s: unknown opcode %s\n", __func__, rc_get_opcode_info(inst->U.I.Opcode)->Name); - } + s->CurrentBranchDepth--; + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __func__, + rc_get_opcode_info(inst->U.I.Opcode)->Name); + } } -void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +void +r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) { - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct emit_state s; - struct r500_fragment_program_code *code = &compiler->code->code.r500; + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c; + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; - memset(&s, 0, sizeof(s)); - s.C = &compiler->Base; - s.Code = code; + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; - memset(code, 0, sizeof(*code)); - code->max_temp_idx = 1; - code->inst_end = -1; + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; - for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; - inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->IsFlowControl) { - emit_flowcontrol(&s, inst); - } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { - continue; - } else { - emit_tex(compiler, &inst->U.I); - } - } else { - emit_paired(compiler, &inst->U.P); - } - } + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } - if (code->max_temp_idx >= compiler->Base.max_temp_regs) - rc_error(&compiler->Base, "Too many hardware temporaries used\n"); + if (code->max_temp_idx >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used\n"); - if (compiler->Base.Error) - return; + if (compiler->Base.Error) + return; - if (code->inst_end == -1 || - (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { - int ip; + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; - /* This may happen when dead-code elimination is disabled or - * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= compiler->Base.max_alu_insts-1) { - rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); - return; - } + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= compiler->Base.max_alu_insts - 1) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } - ip = ++code->inst_end; - code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; - } + ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } - /* Make sure TEX_SEM_WAIT is set on the last instruction */ - code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT; + /* Make sure TEX_SEM_WAIT is set on the last instruction */ + code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT; - /* Enable full flow control mode if we are using loops or have if - * statements nested at least four deep. */ - if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { - if (code->max_temp_idx < 1) - code->max_temp_idx = 1; + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; - code->us_fc_ctrl |= R500_FC_FULL_FC_EN; - } + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } } diff --git a/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c index 027a652f2c6..1d9b38bec7e 100644 --- a/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c +++ b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c @@ -4,8 +4,8 @@ */ #include -#include "r300_nir.h" #include "nir_builder.h" +#include "r300_nir.h" static int follow_modifiers(nir_instr *instr) @@ -22,14 +22,13 @@ follow_modifiers(nir_instr *instr) if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 || intrin->intrinsic == nir_intrinsic_load_constant || intrin->intrinsic == nir_intrinsic_load_input) { - nir_foreach_use(use, &intrin->def) { - if (nir_src_parent_instr(use)->type == nir_instr_type_phi) - return intrin->def.index; - } + nir_foreach_use (use, &intrin->def) { + if (nir_src_parent_instr(use)->type == nir_instr_type_phi) + return intrin->def.index; + } } - if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && - !nir_src_is_const(intrin->src[1])) - return intrin->def.index; + if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && !nir_src_is_const(intrin->src[1])) + return intrin->def.index; } /* Assume the worst when we see a phi. */ if (instr->type == nir_instr_type_phi) @@ -55,10 +54,9 @@ has_three_different_tmp_sources(nir_alu_instr *fcsel) if (index == -1) return false; else - src_def_index[i] = index; + src_def_index[i] = index; } - return src_def_index[0] != src_def_index[1] && - src_def_index[0] != src_def_index[2] && + return src_def_index[0] != src_def_index[1] && src_def_index[0] != src_def_index[2] && src_def_index[1] != src_def_index[2]; } @@ -96,20 +94,16 @@ r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data) * even for nir_op_fcsel_gt if the source is 0 or 1 anyway. */ nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr; - if (alu->op == nir_op_fcsel || - (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) { - lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), - nir_ssa_for_alu_src(b, alu, 1), + if (alu->op == nir_op_fcsel || (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) { + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), nir_ssa_for_alu_src(b, alu, 0)); } else if (alu->op == nir_op_fcsel_ge) { nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0)); - lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), - nir_ssa_for_alu_src(b, alu, 1), sge); + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), sge); } else { - nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), - nir_imm_float(b, 0.0)); - lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), - nir_ssa_for_alu_src(b, alu, 1), slt); + nir_def *slt = + nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), nir_imm_float(b, 0.0)); + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), slt); } nir_def_replace(&alu->def, lrp); @@ -121,6 +115,5 @@ r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data) bool r300_nir_lower_fcsel_r500(nir_shader *shader) { - return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr, - nir_metadata_control_flow, NULL); + return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr, nir_metadata_control_flow, NULL); } diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c index 8f45845634a..755b2b6d34b 100644 --- a/src/gallium/drivers/r300/compiler/radeon_code.c +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -5,193 +5,197 @@ #include "radeon_code.h" -#include #include +#include #include #include "radeon_program.h" -void rc_constants_init(struct rc_constant_list * c) +void +rc_constants_init(struct rc_constant_list *c) { - memset(c, 0, sizeof(*c)); + memset(c, 0, sizeof(*c)); } /** * Copy a constants structure, assuming that the destination structure * is not initialized. */ -void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +void +rc_constants_copy(struct rc_constant_list *dst, struct rc_constant_list *src) { - dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); - memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); - dst->Count = src->Count; - dst->_Reserved = src->Count; + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; } -void rc_constants_destroy(struct rc_constant_list * c) +void +rc_constants_destroy(struct rc_constant_list *c) { - free(c->Constants); - memset(c, 0, sizeof(*c)); + free(c->Constants); + memset(c, 0, sizeof(*c)); } -unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +unsigned +rc_constants_add(struct rc_constant_list *c, struct rc_constant *constant) { - unsigned index = c->Count; + unsigned index = c->Count; - if (c->Count >= c->_Reserved) { - struct rc_constant * newlist; + if (c->Count >= c->_Reserved) { + struct rc_constant *newlist; - c->_Reserved = c->_Reserved * 2; - if (!c->_Reserved) - c->_Reserved = 16; + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; - newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); - memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); - free(c->Constants); - c->Constants = newlist; - } + free(c->Constants); + c->Constants = newlist; + } - c->Constants[index] = *constant; - c->Count++; + c->Constants[index] = *constant; + c->Count++; - return index; + return index; } - /** * Add a state vector to the constant list, while trying to avoid duplicates. */ -unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +unsigned +rc_constants_add_state(struct rc_constant_list *c, unsigned state0, unsigned state1) { - unsigned index; - struct rc_constant constant; + unsigned index; + struct rc_constant constant; - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_STATE) { - if (c->Constants[index].u.State[0] == state0 && - c->Constants[index].u.State[1] == state1) - return index; - } - } + for (index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && c->Constants[index].u.State[1] == state1) + return index; + } + } - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_STATE; - constant.UseMask = RC_MASK_XYZW; - constant.u.State[0] = state0; - constant.u.State[1] = state1; + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.UseMask = RC_MASK_XYZW; + constant.u.State[0] = state0; + constant.u.State[1] = state1; - return rc_constants_add(c, &constant); + return rc_constants_add(c, &constant); } - /** * Add an immediate vector to the constant list, while trying to avoid * duplicates. */ -unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +unsigned +rc_constants_add_immediate_vec4(struct rc_constant_list *c, const float *data) { - unsigned index; - struct rc_constant constant; + unsigned index; + struct rc_constant constant; - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { - if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) - return index; - } - } + for (index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float) * 4)) + return index; + } + } - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.UseMask = RC_MASK_XYZW; - memcpy(constant.u.Immediate, data, sizeof(float) * 4); + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.UseMask = RC_MASK_XYZW; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); - return rc_constants_add(c, &constant); + return rc_constants_add(c, &constant); } - /** * Add an immediate scalar to the constant list, while trying to avoid * duplicates. */ -unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +unsigned +rc_constants_add_immediate_scalar(struct rc_constant_list *c, float data, unsigned *swizzle) { - unsigned index, free_comp; - int free_index = -1; - struct rc_constant constant; + unsigned index, free_comp; + int free_index = -1; + struct rc_constant constant; - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { - unsigned comp; - for(comp = 0; comp < 4; ++comp) { - if (c->Constants[index].UseMask & 1 << comp) { - if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); - return index; - } - } else { - if (free_index == -1) { - free_index = index; - free_comp = comp; - } - } - } - } - } + for (index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for (comp = 0; comp < 4; ++comp) { + if (c->Constants[index].UseMask & 1 << comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } else { + if (free_index == -1) { + free_index = index; + free_comp = comp; + } + } + } + } + } - if (free_index >= 0) { - c->Constants[free_index].u.Immediate[free_comp] = data; - c->Constants[free_index].UseMask |= 1 << free_comp; - *swizzle = RC_MAKE_SWIZZLE_SMEAR(free_comp); - return free_index; - } + if (free_index >= 0) { + c->Constants[free_index].u.Immediate[free_comp] = data; + c->Constants[free_index].UseMask |= 1 << free_comp; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(free_comp); + return free_index; + } - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.UseMask = RC_MASK_X; - constant.u.Immediate[0] = data; - *swizzle = RC_SWIZZLE_XXXX; + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.UseMask = RC_MASK_X; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; - return rc_constants_add(c, &constant); + return rc_constants_add(c, &constant); } -static char swizzle_char(unsigned swz) +static char +swizzle_char(unsigned swz) { - switch (swz) { - case RC_SWIZZLE_X: - return 'x'; - case RC_SWIZZLE_Y: - return 'y'; - case RC_SWIZZLE_Z: - return 'z'; - case RC_SWIZZLE_W: - return 'w'; - default: - return 'u'; - } + switch (swz) { + case RC_SWIZZLE_X: + return 'x'; + case RC_SWIZZLE_Y: + return 'y'; + case RC_SWIZZLE_Z: + return 'z'; + case RC_SWIZZLE_W: + return 'w'; + default: + return 'u'; + } } -void rc_constants_print(struct rc_constant_list *c, struct const_remap *r) +void +rc_constants_print(struct rc_constant_list *c, struct const_remap *r) { - for (unsigned i = 0; i < c->Count; i++) { - if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { - float *values = c->Constants[i].u.Immediate; - fprintf(stderr, "CONST[%u] = {", i); - for (unsigned chan = 0; chan < 4; chan++) { - if (c->Constants[i].UseMask & 1 << chan) - fprintf(stderr, "%11.6f ", values[chan]); - else - fprintf(stderr, " unused "); - } - fprintf(stderr, "}\n"); - } - if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) { - fprintf(stderr, "CONST[%u] = {", i); - for (unsigned chan = 0; chan < 4; chan++) { - fprintf(stderr, "CONST[%i].%c ", r[i].index[chan], - swizzle_char(r[i].swizzle[chan])); - } - fprintf(stderr, " }\n"); - } - } + for (unsigned i = 0; i < c->Count; i++) { + if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { + float *values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = {", i); + for (unsigned chan = 0; chan < 4; chan++) { + if (c->Constants[i].UseMask & 1 << chan) + fprintf(stderr, "%11.6f ", values[chan]); + else + fprintf(stderr, " unused "); + } + fprintf(stderr, "}\n"); + } + if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) { + fprintf(stderr, "CONST[%u] = {", i); + for (unsigned chan = 0; chan < 4; chan++) { + fprintf(stderr, "CONST[%i].%c ", r[i].index[chan], swizzle_char(r[i].swizzle[chan])); + } + fprintf(stderr, " }\n"); + } + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h index e7bdd0242a7..3d9920b8da4 100644 --- a/src/gallium/drivers/r300/compiler/radeon_code.h +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -14,78 +14,79 @@ #define R300_PFS_NUM_TEMP_REGS 32 #define R300_PFS_NUM_CONST_REGS 32 -#define R400_PFS_MAX_ALU_INST 512 -#define R400_PFS_MAX_TEX_INST 512 +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 -#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 #define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 /* The r500 maximum depth is not just for loops, but any combination of loops * and subroutine jumps. */ #define R500_PVS_MAX_LOOP_DEPTH 8 -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER + 0) enum { - /** - * External constants are constants whose meaning is unknown to this - * compiler. For example, a Mesa gl_program's constants are turned - * into external constants. - */ - RC_CONSTANT_EXTERNAL = 0, + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, - RC_CONSTANT_IMMEDIATE, + RC_CONSTANT_IMMEDIATE, - /** - * Constant referring to state that is known by this compiler, - * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. - */ - RC_CONSTANT_STATE + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE }; enum { - RC_STATE_SHADOW_AMBIENT = 0, + RC_STATE_SHADOW_AMBIENT = 0, - RC_STATE_R300_WINDOW_DIMENSION, - RC_STATE_R300_TEXRECT_FACTOR, - RC_STATE_R300_TEXSCALE_FACTOR, - RC_STATE_R300_VIEWPORT_SCALE, - RC_STATE_R300_VIEWPORT_OFFSET + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_TEXSCALE_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET }; struct rc_constant { - unsigned Type:2; /**< RC_CONSTANT_xxx */ - unsigned UseMask:4; + unsigned Type : 2; /**< RC_CONSTANT_xxx */ + unsigned UseMask : 4; - union { - unsigned External; - float Immediate[4]; - unsigned State[2]; - } u; + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; }; struct rc_constant_list { - struct rc_constant * Constants; - unsigned Count; + struct rc_constant *Constants; + unsigned Count; - unsigned _Reserved; + unsigned _Reserved; }; struct const_remap { - int index[4]; - uint8_t swizzle[4]; + int index[4]; + uint8_t swizzle[4]; }; -void rc_constants_init(struct rc_constant_list * c); -void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); -void rc_constants_destroy(struct rc_constant_list * c); -unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); -unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); -unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); -unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +void rc_constants_init(struct rc_constant_list *c); +void rc_constants_copy(struct rc_constant_list *dst, struct rc_constant_list *src); +void rc_constants_destroy(struct rc_constant_list *c); +unsigned rc_constants_add(struct rc_constant_list *c, struct rc_constant *constant); +unsigned rc_constants_add_state(struct rc_constant_list *c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list *c, const float *data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list *c, float data, + unsigned *swizzle); void rc_constants_print(struct rc_constant_list *c, struct const_remap *r); /** @@ -95,14 +96,14 @@ void rc_constants_print(struct rc_constant_list *c, struct const_remap *r); * the correct GL compare function. */ typedef enum { - RC_COMPARE_FUNC_NEVER = 0, - RC_COMPARE_FUNC_LESS, - RC_COMPARE_FUNC_EQUAL, - RC_COMPARE_FUNC_LEQUAL, - RC_COMPARE_FUNC_GREATER, - RC_COMPARE_FUNC_NOTEQUAL, - RC_COMPARE_FUNC_GEQUAL, - RC_COMPARE_FUNC_ALWAYS + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS } rc_compare_func; /** @@ -111,173 +112,168 @@ typedef enum { * These are not quite the same as their GL counterparts yet. */ typedef enum { - RC_WRAP_NONE = 0, - RC_WRAP_REPEAT, - RC_WRAP_MIRRORED_REPEAT, - RC_WRAP_MIRRORED_CLAMP + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP } rc_wrap_mode; /** * Stores state that influences the compilation of a fragment program. */ struct r300_fragment_program_external_state { - struct { - /** - * This field contains swizzle for some lowering passes - * (shadow comparison, unorm->snorm conversion) - */ - unsigned texture_swizzle:12; + struct { + /** + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) + */ + unsigned texture_swizzle : 12; - /** - * If the sampler is used as a shadow sampler, - * this field specifies the compare function. - * - * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). - * \sa rc_compare_func - */ - unsigned texture_compare_func : 3; + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; - /** - * No matter what the sampler type is, - * this field turns it into a shadow sampler. - */ - unsigned compare_mode_enabled : 1; + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; - /** - * This field specifies wrapping modes for the sampler. - * - * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths - * will be performed on the coordinates. - */ - unsigned wrap_mode : 3; + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 3; - /** - * The coords are scaled after applying the wrap mode emulation - * and right before texture fetch. The scaling factor is given by - * RC_STATE_R300_TEXSCALE_FACTOR. */ - unsigned clamp_and_scale_before_fetch : 1; - } unit[16]; + /** + * The coords are scaled after applying the wrap mode emulation + * and right before texture fetch. The scaling factor is given by + * RC_STATE_R300_TEXSCALE_FACTOR. */ + unsigned clamp_and_scale_before_fetch : 1; + } unit[16]; - unsigned alpha_to_one:1; + unsigned alpha_to_one : 1; }; - - struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; }; /** * Stores an R300 fragment program in its compiled-to-hardware form. */ struct r300_fragment_program_code { - struct { - unsigned int length; /**< total # of texture instructions used */ - uint32_t inst[R400_PFS_MAX_TEX_INST]; - } tex; + struct { + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; + } tex; - struct { - unsigned int length; /**< total # of ALU instructions used */ - struct { - uint32_t rgb_inst; - uint32_t rgb_addr; - uint32_t alpha_inst; - uint32_t alpha_addr; - uint32_t r400_ext_addr; - } inst[R400_PFS_MAX_ALU_INST]; - } alu; + struct { + unsigned int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; + } alu; - uint32_t config; /* US_CONFIG */ - uint32_t pixsize; /* US_PIXSIZE */ - uint32_t code_offset; /* US_CODE_OFFSET */ - uint32_t r400_code_offset_ext; /* US_CODE_EXT */ - uint32_t code_addr[4]; /* US_CODE_ADDR */ - /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries - * for r400 cards */ - unsigned int r390_mode:1; + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode : 1; }; - struct r500_fragment_program_code { - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - uint32_t inst4; - uint32_t inst5; - } inst[R500_PFS_MAX_INST]; + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; - int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ - int max_temp_idx; + int max_temp_idx; - uint32_t us_fc_ctrl; + uint32_t us_fc_ctrl; - uint32_t int_constants[32]; - uint32_t int_constant_count; + uint32_t int_constants[32]; + uint32_t int_constant_count; }; struct rX00_fragment_program_code { - union { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; - unsigned writes_depth:1; + unsigned writes_depth : 1; - struct rc_constant_list constants; - struct const_remap *constants_remap_table; + struct rc_constant_list constants; + struct const_remap *constants_remap_table; }; - -#define R300_VS_MAX_ALU 256 -#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) -#define R500_VS_MAX_ALU 1024 -#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) -#define R300_VS_MAX_TEMPS 32 +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 /* This is the max for all chipsets (r300-r500) */ -#define R300_VS_MAX_FC_OPS 16 +#define R300_VS_MAX_FC_OPS 16 #define R300_VS_MAX_LOOP_DEPTH 1 -#define VSF_MAX_INPUTS 32 +#define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 struct r300_vertex_program_code { - int length; - union { - uint32_t d[R500_VS_MAX_ALU_DWORDS]; - float f[R500_VS_MAX_ALU_DWORDS]; - } body; + int length; + union { + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; + } body; - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VSF_MAX_INPUTS]; - int outputs[VSF_MAX_OUTPUTS]; - unsigned last_input_read; - unsigned last_pos_write; + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + unsigned last_input_read; + unsigned last_pos_write; - struct rc_constant_list constants; - struct const_remap *constants_remap_table; + struct rc_constant_list constants; + struct const_remap *constants_remap_table; - uint32_t InputsRead; - uint32_t OutputsWritten; + uint32_t InputsRead; + uint32_t OutputsWritten; - unsigned int num_fc_ops; - uint32_t fc_ops; - union { - uint32_t r300[R300_VS_MAX_FC_OPS]; - struct { - uint32_t lw; - uint32_t uw; - } r500[R300_VS_MAX_FC_OPS]; - } fc_op_addrs; - int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; }; #endif /* RADEON_CODE_H */ - diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c index 757b7cdb659..4631d15c84a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -10,104 +10,108 @@ #include #include -#include "util/u_debug.h" #include "pipe/p_state.h" +#include "util/u_debug.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" #include "radeon_regalloc.h" -#include "radeon_compiler_util.h" - -void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs) +void +rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs) { - memset(c, 0, sizeof(*c)); + memset(c, 0, sizeof(*c)); - memory_pool_init(&c->Pool); - c->Program.Instructions.Prev = &c->Program.Instructions; - c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; - c->regalloc_state = rs; - c->max_temp_index = -1; + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + c->regalloc_state = rs; + c->max_temp_index = -1; } -void rc_destroy(struct radeon_compiler * c) +void +rc_destroy(struct radeon_compiler *c) { - rc_constants_destroy(&c->Program.Constants); - memory_pool_destroy(&c->Pool); - free(c->ErrorMsg); + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); } -void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +void +rc_debug(struct radeon_compiler *c, const char *fmt, ...) { - va_list ap; + va_list ap; - if (!(c->Debug & RC_DBG_LOG)) - return; + if (!(c->Debug & RC_DBG_LOG)) + return; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); } -void rc_error(struct radeon_compiler * c, const char * fmt, ...) +void +rc_error(struct radeon_compiler *c, const char *fmt, ...) { - va_list ap; + va_list ap; - c->Error = 1; + c->Error = 1; - if (!c->ErrorMsg) { - /* Only remember the first error */ - char buf[1024]; - int written; + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; - va_start(ap, fmt); - written = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); - if (written < sizeof(buf)) { - c->ErrorMsg = strdup(buf); - } else { - c->ErrorMsg = malloc(written + 1); + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); - va_start(ap, fmt); - vsnprintf(c->ErrorMsg, written + 1, fmt, ap); - va_end(ap); - } - } + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "r300compiler error: "); + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "r300compiler error: "); - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - } + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } } -int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +int +rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion) { - rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); - return 1; + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; } -void rc_mark_unused_channels(struct radeon_compiler * c, void *user) +void +rc_mark_unused_channels(struct radeon_compiler *c, void *user) { - unsigned int srcmasks[3]; + unsigned int srcmasks[3]; - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { - rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks); + rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks); - for(unsigned int src = 0; src < 3; ++src) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(srcmasks[src], chan)) - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); - } - } - } + for (unsigned int src = 0; src < 3; ++src) { + for (unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } } /** @@ -115,191 +119,192 @@ void rc_mark_unused_channels(struct radeon_compiler * c, void *user) * based on which inputs and outputs are actually referenced * in program instructions. */ -void rc_calculate_inputs_outputs(struct radeon_compiler * c) +void +rc_calculate_inputs_outputs(struct radeon_compiler *c) { - struct rc_instruction *inst; + struct rc_instruction *inst; - c->Program.InputsRead = 0; - c->Program.OutputsWritten = 0; + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) - { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - int i; + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) - c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index; - } + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index; + } - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) - c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index; - } - } + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index; + } + } } /** * Rewrite the program such that a given output is duplicated. */ -void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +void +rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output) { - unsigned tempreg = rc_find_free_temporary(c); - struct rc_instruction * inst; - struct rc_instruction * insert_pos = c->Program.Instructions.Prev; - struct rc_instruction * last_write_inst = NULL; - unsigned branch_depth = 0; - unsigned loop_depth = 0; - bool emit_after_control_flow = false; - unsigned num_writes = 0; + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction *inst; + struct rc_instruction *insert_pos = c->Program.Instructions.Prev; + struct rc_instruction *last_write_inst = NULL; + unsigned branch_depth = 0; + unsigned loop_depth = 0; + bool emit_after_control_flow = false; + unsigned num_writes = 0; - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) - loop_depth++; - if (inst->U.I.Opcode == RC_OPCODE_IF) - branch_depth++; - if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) || - (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--)) - if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) { - insert_pos = inst; - emit_after_control_flow = false; - } + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + loop_depth++; + if (inst->U.I.Opcode == RC_OPCODE_IF) + branch_depth++; + if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) || + (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--)) + if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) { + insert_pos = inst; + emit_after_control_flow = false; + } - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { - num_writes++; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = tempreg; - insert_pos = inst; - last_write_inst = inst; - if (loop_depth != 0 && branch_depth != 0) - emit_after_control_flow = true; - } - } - } + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + num_writes++; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + insert_pos = inst; + last_write_inst = inst; + if (loop_depth != 0 && branch_depth != 0) + emit_after_control_flow = true; + } + } + } - /* If there is only a single write, just duplicate the whole instruction instead. - * We can do this even when the single write was is a control flow. - */ - if (num_writes == 1) { - last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT; - last_write_inst->U.I.DstReg.Index = output; + /* If there is only a single write, just duplicate the whole instruction instead. + * We can do this even when the single write was is a control flow. + */ + if (num_writes == 1) { + last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT; + last_write_inst->U.I.DstReg.Index = output; - inst = rc_insert_new_instruction(c, last_write_inst); - struct rc_instruction * prev = inst->Prev; - struct rc_instruction * next = inst->Next; - memcpy(inst, last_write_inst, sizeof(struct rc_instruction)); - inst->Prev = prev; - inst->Next = next; - inst->U.I.DstReg.Index = dup_output; - } else { - inst = rc_insert_new_instruction(c, insert_pos); - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = output; + inst = rc_insert_new_instruction(c, last_write_inst); + struct rc_instruction *prev = inst->Prev; + struct rc_instruction *next = inst->Next; + memcpy(inst, last_write_inst, sizeof(struct rc_instruction)); + inst->Prev = prev; + inst->Next = next; + inst->U.I.DstReg.Index = dup_output; + } else { + inst = rc_insert_new_instruction(c, insert_pos); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = tempreg; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst = rc_insert_new_instruction(c, inst); - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = dup_output; + inst = rc_insert_new_instruction(c, inst); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = tempreg; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - } + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + } - c->Program.OutputsWritten |= 1U << dup_output; + c->Program.OutputsWritten |= 1U << dup_output; } - /** * Introduce standard code fragment to deal with fragment.position. */ -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, - int full_vtransform) +void +rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input, + int full_vtransform) { - unsigned tempregi = rc_find_free_temporary(c); - struct rc_instruction * inst_rcp; - struct rc_instruction * inst_mul; - struct rc_instruction * inst_mad; - struct rc_instruction * inst; + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_rcp; + struct rc_instruction *inst_mul; + struct rc_instruction *inst_mad; + struct rc_instruction *inst; - c->Program.InputsRead &= ~(1U << wpos); - c->Program.InputsRead |= 1U << new_input; + c->Program.InputsRead &= ~(1U << wpos); + c->Program.InputsRead |= 1U << new_input; - /* perspective divide */ - inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tempregi; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; - inst_rcp->U.I.SrcReg[0].Index = new_input; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = tempregi; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; - inst_mul->U.I.SrcReg[0].Index = new_input; + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = tempregi; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - /* viewport transformation */ - inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->U.I.Opcode = RC_OPCODE_MAD; + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = tempregi; - inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[0].Index = tempregi; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; - inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; - if (full_vtransform) { - inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); - inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); - } else { - inst_mad->U.I.SrcReg[1].Index = - inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - } + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = + rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = + rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index = + rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } - for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; - for(i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && - inst->U.I.SrcReg[i].Index == wpos) { - inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[i].Index = tempregi; - } - } - } + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } } - /** * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. * Gallium and OpenGL define it the other way around. @@ -307,203 +312,200 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig * So let's just negate FACE at the beginning of the shader and rewrite the rest * of the shader to read from the newly allocated temporary. */ -void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +void +rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) { - unsigned tempregi = rc_find_free_temporary(c); - struct rc_instruction *inst_add; - struct rc_instruction *inst; + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; - /* perspective divide */ - inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_add->U.I.Opcode = RC_OPCODE_ADD; + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = tempregi; - inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; - inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; - inst_add->U.I.SrcReg[1].Index = face; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; - inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; - for(i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && - inst->U.I.SrcReg[i].Index == face) { - inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[i].Index = tempregi; - } - } - } + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } } -static void reg_count_callback(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void +reg_count_callback(void *userdata, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct rc_program_stats *s = userdata; - if (file == RC_FILE_TEMPORARY) - (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; - if (file == RC_FILE_INLINE) - s->num_inline_literals++; - if (file == RC_FILE_CONSTANT) - s->num_consts = MAX2(s->num_consts, index + 1); + struct rc_program_stats *s = userdata; + if (file == RC_FILE_TEMPORARY) + (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; + if (file == RC_FILE_INLINE) + s->num_inline_literals++; + if (file == RC_FILE_CONSTANT) + s->num_consts = MAX2(s->num_consts, index + 1); } -void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) +void +rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { - struct rc_instruction * tmp; - memset(s, 0, sizeof(*s)); - unsigned ip = 0; - int last_begintex = -1; + struct rc_instruction *tmp; + memset(s, 0, sizeof(*s)); + unsigned ip = 0; + int last_begintex = -1; - for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; - tmp = tmp->Next, ip++){ - const struct rc_opcode_info * info; - rc_for_all_reads_mask(tmp, reg_count_callback, s); - if (tmp->Type == RC_INSTRUCTION_NORMAL) { - info = rc_get_opcode_info(tmp->U.I.Opcode); - if (info->Opcode == RC_OPCODE_BEGIN_TEX) { - /* The R5xx docs mention ~30 cycles in section 8.3.1 - * The only case when we don't want to add the cycles - * penalty is when the texblock contains only kil. - */ - const struct rc_opcode_info *next_op - = rc_get_opcode_info(tmp->Next->U.I.Opcode); - struct rc_instruction *second_next_instr = tmp->Next->Next; - const struct rc_opcode_info *second_next_op; - if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) { - second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode); - } else { - second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode); - } - if (next_op->Opcode != RC_OPCODE_KIL || - (second_next_instr->Type == RC_INSTRUCTION_NORMAL && - second_next_op->HasTexture)) { - s->num_cycles += 30; - last_begintex = ip; - } - continue; - } - if (info->Opcode == RC_OPCODE_MAD && - rc_inst_has_three_diff_temp_srcs(tmp)) - s->num_cycles++; - } else { - if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - s->num_presub_ops++; - if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - s->num_presub_ops++; - /* Assuming alpha will never be a flow control or - * a tex instruction. */ - if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - s->num_alpha_insts++; - if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - s->num_rgb_insts++; - if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && - tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { - s->num_omod_ops++; - } - if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && - tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { - s->num_omod_ops++; - } - if (tmp->U.P.Nop) - s->num_cycles++; - /* SemWait has effect only on R500, the more instructions we can put - * between the tex block and the first texture semaphore, the better. - */ - if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) { - s->num_cycles -= MIN2(30, ip - last_begintex); - last_begintex = -1; - } - info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); - } - if (info->IsFlowControl) { - s->num_fc_insts++; - if (info->Opcode == RC_OPCODE_BGNLOOP) - s->num_loops++; - } - /* VS flow control was already translated to the predicate instructions */ - if (c->type == RC_VERTEX_PROGRAM) - if (strstr(info->Name, "PRED") != NULL) - s->num_pred_insts++; + for (tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next, ip++) { + const struct rc_opcode_info *info; + rc_for_all_reads_mask(tmp, reg_count_callback, s); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) { + /* The R5xx docs mention ~30 cycles in section 8.3.1 + * The only case when we don't want to add the cycles + * penalty is when the texblock contains only kil. + */ + const struct rc_opcode_info *next_op = rc_get_opcode_info(tmp->Next->U.I.Opcode); + struct rc_instruction *second_next_instr = tmp->Next->Next; + const struct rc_opcode_info *second_next_op; + if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) { + second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode); + } else { + second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode); + } + if (next_op->Opcode != RC_OPCODE_KIL || + (second_next_instr->Type == RC_INSTRUCTION_NORMAL && second_next_op->HasTexture)) { + s->num_cycles += 30; + last_begintex = ip; + } + continue; + } + if (info->Opcode == RC_OPCODE_MAD && rc_inst_has_three_diff_temp_srcs(tmp)) + s->num_cycles++; + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + s->num_alpha_insts++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + s->num_rgb_insts++; + if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { + s->num_omod_ops++; + } + if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { + s->num_omod_ops++; + } + if (tmp->U.P.Nop) + s->num_cycles++; + /* SemWait has effect only on R500, the more instructions we can put + * between the tex block and the first texture semaphore, the better. + */ + if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) { + s->num_cycles -= MIN2(30, ip - last_begintex); + last_begintex = -1; + } + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) { + s->num_fc_insts++; + if (info->Opcode == RC_OPCODE_BGNLOOP) + s->num_loops++; + } + /* VS flow control was already translated to the predicate instructions */ + if (c->type == RC_VERTEX_PROGRAM) + if (strstr(info->Name, "PRED") != NULL) + s->num_pred_insts++; - if (info->HasTexture) - s->num_tex_insts++; - s->num_insts++; - s->num_cycles++; - } - /* Increment here because the reg_count_callback store the max - * temporary reg index in s->nun_temp_regs. */ - s->num_temp_regs++; + if (info->HasTexture) + s->num_tex_insts++; + s->num_insts++; + s->num_cycles++; + } + /* Increment here because the reg_count_callback store the max + * temporary reg index in s->nun_temp_regs. */ + s->num_temp_regs++; } -static void print_stats(struct radeon_compiler * c) +static void +print_stats(struct radeon_compiler *c) { - struct rc_program_stats s; + struct rc_program_stats s; - rc_get_stats(c, &s); + rc_get_stats(c, &s); - /* Note that we print some dummy values for instruction categories that - * only the FS has, because shader-db's report.py wants all shaders to - * have the same set. - */ - util_debug_message(c->debug, SHADER_INFO, - "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, " - "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles", - c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", - s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts, - s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops, - s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, - s.num_cycles); + /* Note that we print some dummy values for instruction categories that + * only the FS has, because shader-db's report.py wants all shaders to + * have the same set. + */ + util_debug_message( + c->debug, SHADER_INFO, + "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, " + "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles", + c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_pred_insts, s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops, + s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, s.num_cycles); } -static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { - "Vertex Program", - "Fragment Program" -}; +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {"Vertex Program", "Fragment Program"}; -bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +bool +rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) { - for (unsigned i = 0; list[i].name; i++) { - if (list[i].predicate) { - list[i].run(c, list[i].user); + for (unsigned i = 0; list[i].name; i++) { + if (list[i].predicate) { + list[i].run(c, list[i].user); - if (c->Error) - return false; + if (c->Error) + return false; - if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); - rc_print_program(&c->Program); - } - } - } - return true; + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); + rc_print_program(&c->Program); + } + } + } + return true; } /* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +void +rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) { - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); - rc_print_program(&c->Program); - } + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } - if(rc_run_compiler_passes(c, list)) { - print_stats(c); - } + if (rc_run_compiler_passes(c, list)) { + print_stats(c); + } } -void rc_validate_final_shader(struct radeon_compiler *c, void *user) +void +rc_validate_final_shader(struct radeon_compiler *c, void *user) { - /* Check the number of constants. */ - if (c->Program.Constants.Count > c->max_constants) { - rc_error(c, "Too many constants. Max: %i, Got: %i\n", - c->max_constants, c->Program.Constants.Count); - } + /* Check the number of constants. */ + if (c->Program.Constants.Count > c->max_constants) { + rc_error(c, "Too many constants. Max: %i, Got: %i\n", c->max_constants, + c->Program.Constants.Count); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h index 4337e6b8c67..699500d1d35 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -12,59 +12,55 @@ #include "radeon_code.h" #include "radeon_program.h" -#define RC_DBG_LOG (1 << 0) +#define RC_DBG_LOG (1 << 0) struct rc_swizzle_caps; -enum rc_program_type { - RC_VERTEX_PROGRAM, - RC_FRAGMENT_PROGRAM, - RC_NUM_PROGRAM_TYPES -}; +enum rc_program_type { RC_VERTEX_PROGRAM, RC_FRAGMENT_PROGRAM, RC_NUM_PROGRAM_TYPES }; struct radeon_compiler { - struct memory_pool Pool; - struct rc_program Program; - const struct rc_regalloc_state *regalloc_state; - struct util_debug_callback *debug; - enum rc_program_type type; - unsigned Debug:2; - unsigned Error:1; - char * ErrorMsg; + struct memory_pool Pool; + struct rc_program Program; + const struct rc_regalloc_state *regalloc_state; + struct util_debug_callback *debug; + enum rc_program_type type; + unsigned Debug : 2; + unsigned Error : 1; + char *ErrorMsg; - /* Hardware specification. */ - unsigned is_r400:1; - unsigned is_r500:1; - unsigned has_half_swizzles:1; - unsigned has_presub:1; - unsigned has_omod:1; - unsigned disable_optimizations:1; - unsigned max_temp_regs; - unsigned max_constants; - int max_alu_insts; - unsigned max_tex_insts; + /* Hardware specification. */ + unsigned is_r400 : 1; + unsigned is_r500 : 1; + unsigned has_half_swizzles : 1; + unsigned has_presub : 1; + unsigned has_omod : 1; + unsigned disable_optimizations : 1; + unsigned max_temp_regs; + unsigned max_constants; + int max_alu_insts; + unsigned max_tex_insts; - int max_temp_index; + int max_temp_index; - /* Whether to remove unused constants and empty holes in constant space. */ - unsigned remove_unused_constants:1; + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants : 1; - /** - * Variables used internally, not be touched by callers - * of the compiler - */ - /*@{*/ - const struct rc_swizzle_caps * SwizzleCaps; - /*@}*/ + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + const struct rc_swizzle_caps *SwizzleCaps; + /*@}*/ }; -void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs); -void rc_destroy(struct radeon_compiler * c); +void rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs); +void rc_destroy(struct radeon_compiler *c); -void rc_debug(struct radeon_compiler * c, const char * fmt, ...); -void rc_error(struct radeon_compiler * c, const char * fmt, ...); +void rc_debug(struct radeon_compiler *c, const char *fmt, ...); +void rc_error(struct radeon_compiler *c, const char *fmt, ...); -int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); +int rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion); /** * This macro acts like an if-statement that can be used to implement @@ -78,71 +74,68 @@ int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, c * if (rc_assert(c, condition-that-must-be-true)) * return; */ -#define rc_assert(c, cond) \ - (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) +#define rc_assert(c, cond) (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) -void rc_mark_unused_channels(struct radeon_compiler * c, void *user); -void rc_calculate_inputs_outputs(struct radeon_compiler * c); -void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, +void rc_mark_unused_channels(struct radeon_compiler *c, void *user); +void rc_calculate_inputs_outputs(struct radeon_compiler *c); +void rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input, int full_vtransform); void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); struct r300_fragment_program_compiler { - struct radeon_compiler Base; - struct rX00_fragment_program_code *code; - /* Optional transformations and features. */ - struct r300_fragment_program_external_state state; - /* Register corresponding to the depthbuffer. */ - unsigned OutputDepth; - /* Registers corresponding to the four colorbuffers. */ - unsigned OutputColor[4]; + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; - void * UserData; - void (*AllocateHwInputs)( - struct r300_fragment_program_compiler * c, - void (*allocate)(void * data, unsigned input, unsigned hwreg), - void * mydata); + void *UserData; + void (*AllocateHwInputs)(struct r300_fragment_program_compiler *c, + void (*allocate)(void *data, unsigned input, unsigned hwreg), + void *mydata); }; -void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler *c); struct r300_vertex_program_compiler { - struct radeon_compiler Base; - struct r300_vertex_program_code *code; - uint32_t RequiredOutputs; - - void * UserData; - void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + void *UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler *c); }; -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c); void rc_vert_fc(struct radeon_compiler *compiler, void *user); void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); struct radeon_compiler_pass { - const char *name; /* Name of the pass. */ - int dump; /* Dump the program if Debug == 1? */ - int predicate; /* Run this pass? */ - void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ - void *user; /* Optional parameter which is passed to the run function. */ + const char *name; /* Name of the pass. */ + int dump; /* Dump the program if Debug == 1? */ + int predicate; /* Run this pass? */ + void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ + void *user; /* Optional parameter which is passed to the run function. */ }; struct rc_program_stats { - unsigned num_cycles; - unsigned num_consts; - unsigned num_insts; - unsigned num_fc_insts; - unsigned num_tex_insts; - unsigned num_rgb_insts; - unsigned num_alpha_insts; - unsigned num_pred_insts; - unsigned num_presub_ops; - unsigned num_temp_regs; - unsigned num_omod_ops; - unsigned num_inline_literals; - unsigned num_loops; + unsigned num_cycles; + unsigned num_consts; + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_pred_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; + unsigned num_omod_ops; + unsigned num_inline_literals; + unsigned num_loops; }; void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c index 5038b679e88..a5e8aba991a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -5,31 +5,33 @@ #include "radeon_compiler_util.h" +#include "r300_fragprog_swizzle.h" #include "radeon_compiler.h" #include "radeon_dataflow.h" -#include "r300_fragprog_swizzle.h" #include "util/u_math.h" /** */ -unsigned int rc_swizzle_to_writemask(unsigned int swz) +unsigned int +rc_swizzle_to_writemask(unsigned int swz) { - unsigned int mask = 0; - unsigned int i; + unsigned int mask = 0; + unsigned int i; - for(i = 0; i < 4; i++) { - mask |= 1 << GET_SWZ(swz, i); - } - mask &= RC_MASK_XYZW; + for (i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; - return mask; + return mask; } -rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +rc_swizzle +get_swz(unsigned int swz, rc_swizzle idx) { - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); } /** @@ -41,81 +43,91 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) * @return An initialized swizzle that has all of the unused channels set to * RC_SWIZZLE_UNUSED. */ -unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +unsigned int +rc_init_swizzle(unsigned int initial_value, unsigned int channels) { - unsigned int i; - for (i = channels; i < 4; i++) { - SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); - } - return initial_value; + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; } -unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +unsigned int +combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, + rc_swizzle swz_w) { - unsigned int ret = 0; + unsigned int ret = 0; - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; - return ret; + return ret; } -unsigned int combine_swizzles(unsigned int src, unsigned int swz) +unsigned int +combine_swizzles(unsigned int src, unsigned int swz) { - unsigned int ret = 0; + unsigned int ret = 0; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - return ret; + return ret; } /** * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W */ -rc_swizzle rc_mask_to_swizzle(unsigned int mask) +rc_swizzle +rc_mask_to_swizzle(unsigned int mask) { - switch (mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - } - return RC_SWIZZLE_UNUSED; + switch (mask) { + case RC_MASK_X: + return RC_SWIZZLE_X; + case RC_MASK_Y: + return RC_SWIZZLE_Y; + case RC_MASK_Z: + return RC_SWIZZLE_Z; + case RC_MASK_W: + return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; } /* Reorder mask bits according to swizzle. */ -unsigned swizzle_mask(unsigned swizzle, unsigned mask) +unsigned +swizzle_mask(unsigned swizzle, unsigned mask) { - unsigned ret = 0; - for (unsigned chan = 0; chan < 4; ++chan) { - unsigned swz = GET_SWZ(swizzle, chan); - if (swz < 4) - ret |= GET_BIT(mask, swz) << chan; - } - return ret; + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; } -static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +static unsigned int +srcs_need_rewrite(const struct rc_opcode_info *info) { - if (info->HasTexture) { - return 0; - } - switch (info->Opcode) { - case RC_OPCODE_DP2: - case RC_OPCODE_DP3: - case RC_OPCODE_DP4: - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - return 0; - default: - return 1; - } + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } } /** @@ -129,38 +141,35 @@ static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) * @param conversion_swizzle Describes the conversion to perform on the swizzle * @return A new swizzle */ -unsigned int rc_adjust_channels( - unsigned int old_swizzle, - unsigned int conversion_swizzle) +unsigned int +rc_adjust_channels(unsigned int old_swizzle, unsigned int conversion_swizzle) { - unsigned int i; - unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); - for (i = 0; i < 4; i++) { - unsigned int new_chan = get_swz(conversion_swizzle, i); - if (new_chan == RC_SWIZZLE_UNUSED) { - continue; - } - SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); - } - return new_swizzle; + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; } -static unsigned int rewrite_writemask( - unsigned int old_mask, - unsigned int conversion_swizzle) +static unsigned int +rewrite_writemask(unsigned int old_mask, unsigned int conversion_swizzle) { - unsigned int new_mask = 0; - unsigned int i; + unsigned int new_mask = 0; + unsigned int i; - for (i = 0; i < 4; i++) { - if (!GET_BIT(old_mask, i) - || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { - continue; - } - new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); - } + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } - return new_mask; + return new_mask; } /** @@ -170,79 +179,72 @@ static unsigned int rewrite_writemask( * new writemask. For a detailed description of how conversion swizzles * work see rc_rewrite_swizzle(). */ -void rc_pair_rewrite_writemask( - struct rc_pair_sub_instruction * sub, - unsigned int conversion_swizzle) +void +rc_pair_rewrite_writemask(struct rc_pair_sub_instruction *sub, unsigned int conversion_swizzle) { - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - unsigned int i; + const struct rc_opcode_info *info = rc_get_opcode_info(sub->Opcode); + unsigned int i; - sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); - if (!srcs_need_rewrite(info)) { - return ; - } + if (!srcs_need_rewrite(info)) { + return; + } - for (i = 0; i < info->NumSrcRegs; i++) { - sub->Arg[i].Swizzle = - rc_adjust_channels(sub->Arg[i].Swizzle, - conversion_swizzle); - } + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = rc_adjust_channels(sub->Arg[i].Swizzle, conversion_swizzle); + } } -static void normal_rewrite_writemask_cb( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +normal_rewrite_writemask_cb(void *userdata, struct rc_instruction *inst, + struct rc_src_register *src) { - unsigned int * conversion_swizzle = (unsigned int *)userdata; - src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle); + unsigned int *conversion_swizzle = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle); - /* Per-channel negates are possible in vertex shaders, - * so we need to rewrite it properly as well. */ - unsigned int new_negate = 0; - for (unsigned int i = 0; i < 4; i++) { - unsigned int new_chan = get_swz(*conversion_swizzle, i); + /* Per-channel negates are possible in vertex shaders, + * so we need to rewrite it properly as well. */ + unsigned int new_negate = 0; + for (unsigned int i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(*conversion_swizzle, i); - if (new_chan == RC_SWIZZLE_UNUSED) - continue; + if (new_chan == RC_SWIZZLE_UNUSED) + continue; - if ((1 << i) & src->Negate) - new_negate |= 1 << new_chan; - } - src->Negate = new_negate; + if ((1 << i) & src->Negate) + new_negate |= 1 << new_chan; + } + src->Negate = new_negate; } /** * This function is the same as rc_pair_rewrite_writemask() except it * operates on normal instructions. */ -void rc_normal_rewrite_writemask( - struct rc_instruction * inst, - unsigned int conversion_swizzle) +void +rc_normal_rewrite_writemask(struct rc_instruction *inst, unsigned int conversion_swizzle) { - struct rc_sub_instruction * sub = &inst->U.I; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - sub->DstReg.WriteMask = - rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + struct rc_sub_instruction *sub = &inst->U.I; + const struct rc_opcode_info *info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); - if (info->HasTexture) { - unsigned int i; - assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); - for (i = 0; i < 4; i++) { - unsigned int swz = GET_SWZ(conversion_swizzle, i); - if (swz > 3) - continue; - SET_SWZ(sub->TexSwizzle, swz, i); - } - } + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } - if (!srcs_need_rewrite(info)) { - return; - } + if (!srcs_need_rewrite(info)) { + return; + } - rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, - &conversion_swizzle); + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &conversion_swizzle); } /** @@ -256,321 +258,297 @@ void rc_normal_rewrite_writemask( * @param conversion_swizzle Describes the conversion to perform on the swizzle * @return A converted swizzle */ -unsigned int rc_rewrite_swizzle( - unsigned int swizzle, - unsigned int conversion_swizzle) +unsigned int +rc_rewrite_swizzle(unsigned int swizzle, unsigned int conversion_swizzle) { - unsigned int chan; - unsigned int out_swizzle = swizzle; + unsigned int chan; + unsigned int out_swizzle = swizzle; - for (chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(swizzle, chan); - unsigned int new_swz; - if (swz > 3) { - SET_SWZ(out_swizzle, chan, swz); - } else { - new_swz = GET_SWZ(conversion_swizzle, swz); - if (new_swz != RC_SWIZZLE_UNUSED) { - SET_SWZ(out_swizzle, chan, new_swz); - } else { - SET_SWZ(out_swizzle, chan, swz); - } - } - } - return out_swizzle; + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; } /** * Left multiplication of a register with a swizzle */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +struct rc_src_register +lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) { - struct rc_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for (i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i * 3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i * 3); + } + } + return tmp; } -void reset_srcreg(struct rc_src_register* reg) +void +reset_srcreg(struct rc_src_register *reg) { - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; } -unsigned int rc_src_reads_dst_mask( - rc_register_file src_file, - unsigned int src_idx, - unsigned int src_swz, - rc_register_file dst_file, - unsigned int dst_idx, - unsigned int dst_mask) +unsigned int +rc_src_reads_dst_mask(rc_register_file src_file, unsigned int src_idx, unsigned int src_swz, + rc_register_file dst_file, unsigned int dst_idx, unsigned int dst_mask) { - if (src_file != dst_file || src_idx != dst_idx) { - return RC_MASK_NONE; - } - return dst_mask & rc_swizzle_to_writemask(src_swz); + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); } /** * @return A bit mask specifying whether this swizzle will select from an RGB * source, an Alpha source, or both. */ -unsigned int rc_source_type_swz(unsigned int swizzle) +unsigned int +rc_source_type_swz(unsigned int swizzle) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_SOURCE_NONE; + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; - for(chan = 0; chan < 4; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_SOURCE_RGB; - } - } - return ret; + for (chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; } -unsigned int rc_source_type_mask(unsigned int mask) +unsigned int +rc_source_type_mask(unsigned int mask) { - unsigned int ret = RC_SOURCE_NONE; + unsigned int ret = RC_SOURCE_NONE; - if (mask & RC_MASK_XYZ) - ret |= RC_SOURCE_RGB; + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; - if (mask & RC_MASK_W) - ret |= RC_SOURCE_ALPHA; + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; - return ret; + return ret; } struct src_select { - rc_register_file File; - int Index; - unsigned int SrcType; - unsigned int Swizzle; + rc_register_file File; + int Index; + unsigned int SrcType; + unsigned int Swizzle; }; struct can_use_presub_data { - struct src_select Selects[5]; - unsigned int SelectCount; - const struct rc_src_register * ReplaceReg; - unsigned int ReplaceRemoved; + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register *ReplaceReg; + unsigned int ReplaceRemoved; }; -static void can_use_presub_data_add_select( - struct can_use_presub_data * data, - rc_register_file file, - unsigned int index, - unsigned int swizzle) +static void +can_use_presub_data_add_select(struct can_use_presub_data *data, rc_register_file file, + unsigned int index, unsigned int swizzle) { - struct src_select * select; + struct src_select *select; - select = &data->Selects[data->SelectCount++]; - select->File = file; - select->Index = index; - select->SrcType = rc_source_type_swz(swizzle); - select->Swizzle = swizzle; + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = rc_source_type_swz(swizzle); + select->Swizzle = swizzle; } /** * This callback function counts the number of sources in inst that are * different from the sources in can_use_presub_data->RemoveSrcs. */ -static void can_use_presub_read_cb( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +can_use_presub_read_cb(void *userdata, struct rc_instruction *inst, struct rc_src_register *src) { - struct can_use_presub_data * d = userdata; + struct can_use_presub_data *d = userdata; - if (!d->ReplaceRemoved && src == d->ReplaceReg) { - d->ReplaceRemoved = 1; - return; - } + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; + return; + } - if (src->File == RC_FILE_NONE) - return; + if (src->File == RC_FILE_NONE) + return; - can_use_presub_data_add_select(d, src->File, src->Index, - src->Swizzle); + can_use_presub_data_add_select(d, src->File, src->Index, src->Swizzle); } -unsigned int rc_inst_can_use_presub( - struct radeon_compiler * c, - struct rc_instruction * inst, - rc_presubtract_op presub_op, - unsigned int presub_writemask, - const struct rc_src_register * replace_reg, - const struct rc_src_register * presub_src0, - const struct rc_src_register * presub_src1) +unsigned int +rc_inst_can_use_presub(struct radeon_compiler *c, struct rc_instruction *inst, + rc_presubtract_op presub_op, unsigned int presub_writemask, + const struct rc_src_register *replace_reg, + const struct rc_src_register *presub_src0, + const struct rc_src_register *presub_src1) { - struct can_use_presub_data d; - unsigned int num_presub_srcs; - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - int rgb_count = 0, alpha_count = 0; - unsigned int src_type0, src_type1; + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; - if (presub_op == RC_PRESUB_NONE) { - return 1; - } + if (presub_op == RC_PRESUB_NONE) { + return 1; + } - if (info->HasTexture) { - return 0; - } + if (info->HasTexture) { + return 0; + } - struct rc_src_register test_reg = *replace_reg; - test_reg.File = RC_FILE_PRESUB; - if (!c->SwizzleCaps->IsNative(info->Opcode, test_reg)) { - return 0; - } + struct rc_src_register test_reg = *replace_reg; + test_reg.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(info->Opcode, test_reg)) { + return 0; + } - /* We can't allow constant swizzles from presubtract, because it is not possible - * to rewrite it to a native swizzle later. */ - if (!c->is_r500) { - for (i = 0; i < 4; i++) { - rc_swizzle swz = GET_SWZ(replace_reg->Swizzle, i); - if (swz > RC_SWIZZLE_W && swz < RC_SWIZZLE_UNUSED) - return 0; - } - } + /* We can't allow constant swizzles from presubtract, because it is not possible + * to rewrite it to a native swizzle later. */ + if (!c->is_r500) { + for (i = 0; i < 4; i++) { + rc_swizzle swz = GET_SWZ(replace_reg->Swizzle, i); + if (swz > RC_SWIZZLE_W && swz < RC_SWIZZLE_UNUSED) + return 0; + } + } - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - return 0; - } + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } - memset(&d, 0, sizeof(d)); - d.ReplaceReg = replace_reg; + memset(&d, 0, sizeof(d)); + d.ReplaceReg = replace_reg; - rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); - num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - src_type0 = rc_source_type_swz(presub_src0->Swizzle); - can_use_presub_data_add_select(&d, - presub_src0->File, - presub_src0->Index, - presub_src0->Swizzle); + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, presub_src0->File, presub_src0->Index, presub_src0->Swizzle); - if (num_presub_srcs > 1) { - src_type1 = rc_source_type_swz(presub_src1->Swizzle); - can_use_presub_data_add_select(&d, - presub_src1->File, - presub_src1->Index, - presub_src1->Swizzle); + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, presub_src1->File, presub_src1->Index, + presub_src1->Swizzle); - /* Even if both of the presub sources read from the same - * register, we still need to use 2 different source selects - * for them, so we need to increment the count to compensate. - */ - if (presub_src0->File == presub_src1->File - && presub_src0->Index == presub_src1->Index) { - if (src_type0 & src_type1 & RC_SOURCE_RGB) { - rgb_count++; - } - if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { - alpha_count++; - } - } - } + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } - /* Count the number of source selects for Alpha and RGB. If we - * encounter two of the same source selects then we can ignore the - * first one. */ - for (i = 0; i < d.SelectCount; i++) { - unsigned int j; - unsigned int src_type = d.Selects[i].SrcType; - for (j = i + 1; j < d.SelectCount; j++) { - /* Even if the sources are the same now, they will not be the - * same later, if we have to rewrite some non-native swizzle. */ - if(!c->is_r500 && ( - !r300_swizzle_is_native_basic(d.Selects[i].Swizzle) || - !r300_swizzle_is_native_basic(d.Selects[j].Swizzle))) - continue; - if (d.Selects[i].File == d.Selects[j].File - && d.Selects[i].Index == d.Selects[j].Index) { - src_type &= ~d.Selects[j].SrcType; - } - } - if (src_type & RC_SOURCE_RGB) { - rgb_count++; - } + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + /* Even if the sources are the same now, they will not be the + * same later, if we have to rewrite some non-native swizzle. */ + if (!c->is_r500 && (!r300_swizzle_is_native_basic(d.Selects[i].Swizzle) || + !r300_swizzle_is_native_basic(d.Selects[j].Swizzle))) + continue; + if (d.Selects[i].File == d.Selects[j].File && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } - if (src_type & RC_SOURCE_ALPHA) { - alpha_count++; - } - } + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } - if (rgb_count > 3 || alpha_count > 3) { - return 0; - } + if (rgb_count > 3 || alpha_count > 3) { + return 0; + } - return 1; + return 1; } struct max_data { - unsigned int Max; - unsigned int HasFileType; - rc_register_file File; + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; }; -static void max_callback( - void * userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) +static void +max_callback(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index, + unsigned int mask) { - struct max_data * d = (struct max_data*)userdata; - if (file == d->File && (!d->HasFileType || index > d->Max)) { - d->Max = index; - d->HasFileType = 1; - } + struct max_data *d = (struct max_data *)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } } /** * @return The maximum index of the specified register file used by the * program. */ -int rc_get_max_index( - struct radeon_compiler * c, - rc_register_file file) +int +rc_get_max_index(struct radeon_compiler *c, rc_register_file file) { - struct max_data data; - struct rc_instruction * inst; - data.Max = 0; - data.HasFileType = 0; - data.File = file; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - rc_for_all_reads_mask(inst, max_callback, &data); - rc_for_all_writes_mask(inst, max_callback, &data); - } - if (!data.HasFileType) { - return -1; - } else { - return data.Max; - } + struct max_data data; + struct rc_instruction *inst; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } } /** @@ -580,147 +558,137 @@ int rc_get_max_index( * @param source The index of the source to remove */ -void rc_pair_remove_src( - struct rc_instruction * inst, - unsigned int src_type, - unsigned int source) +void +rc_pair_remove_src(struct rc_instruction *inst, unsigned int src_type, unsigned int source) { - if (src_type & RC_SOURCE_RGB) { - memset(&inst->U.P.RGB.Src[source], 0, - sizeof(struct rc_pair_instruction_source)); - } + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, sizeof(struct rc_pair_instruction_source)); + } - if (src_type & RC_SOURCE_ALPHA) { - memset(&inst->U.P.Alpha.Src[source], 0, - sizeof(struct rc_pair_instruction_source)); - } + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, sizeof(struct rc_pair_instruction_source)); + } } /** * @return RC_OPCODE_NOOP if inst is not a flow control instruction. * @return The opcode of inst if it is a flow control instruction. */ -rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +rc_opcode +rc_get_flow_control_inst(struct rc_instruction *inst) { - const struct rc_opcode_info * info; - if (inst->Type == RC_INSTRUCTION_NORMAL) { - info = rc_get_opcode_info(inst->U.I.Opcode); - } else { - info = rc_get_opcode_info(inst->U.P.RGB.Opcode); - /*A flow control instruction shouldn't have an alpha - * instruction.*/ - assert(!info->IsFlowControl || - inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); - } - - if (info->IsFlowControl) - return info->Opcode; - else - return RC_OPCODE_NOP; + const struct rc_opcode_info *info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; } /** * @return The BGNLOOP instruction that starts the loop ended by endloop. */ -struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +struct rc_instruction * +rc_match_endloop(struct rc_instruction *endloop) { - unsigned int endloop_count = 0; - struct rc_instruction * inst; - for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_ENDLOOP) { - endloop_count++; - } else if (op == RC_OPCODE_BGNLOOP) { - if (endloop_count == 0) { - return inst; - } else { - endloop_count--; - } - } - } - return NULL; + unsigned int endloop_count = 0; + struct rc_instruction *inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; } /** * @return The ENDLOOP instruction that ends the loop started by bgnloop. */ -struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +struct rc_instruction * +rc_match_bgnloop(struct rc_instruction *bgnloop) { - unsigned int bgnloop_count = 0; - struct rc_instruction * inst; - for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_BGNLOOP) { - bgnloop_count++; - } else if (op == RC_OPCODE_ENDLOOP) { - if (bgnloop_count == 0) { - return inst; - } else { - bgnloop_count--; - } - } - } - return NULL; + unsigned int bgnloop_count = 0; + struct rc_instruction *inst; + for (inst = bgnloop->Next; inst != bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; } /** * @return A conversion swizzle for converting from old_mask->new_mask */ -unsigned int rc_make_conversion_swizzle( - unsigned int old_mask, - unsigned int new_mask) +unsigned int +rc_make_conversion_swizzle(unsigned int old_mask, unsigned int new_mask) { - unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); - unsigned int old_idx; - unsigned int new_idx = 0; - for (old_idx = 0; old_idx < 4; old_idx++) { - if (!GET_BIT(old_mask, old_idx)) - continue; - for ( ; new_idx < 4; new_idx++) { - if (GET_BIT(new_mask, new_idx)) { - SET_SWZ(conversion_swizzle, old_idx, new_idx); - new_idx++; - break; - } - } - } - return conversion_swizzle; + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for (; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; } /** * @return 1 if the register contains an immediate value, 0 otherwise. */ -unsigned int rc_src_reg_is_immediate( - struct radeon_compiler * c, - unsigned int file, - unsigned int index) +unsigned int +rc_src_reg_is_immediate(struct radeon_compiler *c, unsigned int file, unsigned int index) { - return file == RC_FILE_CONSTANT && - c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE; + return file == RC_FILE_CONSTANT && + c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE; } /** * @return The immediate value in the specified register. */ -float rc_get_constant_value( - struct radeon_compiler * c, - unsigned int index, - unsigned int swizzle, - unsigned int negate, - unsigned int chan) +float +rc_get_constant_value(struct radeon_compiler *c, unsigned int index, unsigned int swizzle, + unsigned int negate, unsigned int chan) { - float base = 1.0f; - int swz = GET_SWZ(swizzle, chan); - if(swz >= 4 || index >= c->Program.Constants.Count ){ - rc_error(c, "get_constant_value: Can't find a value.\n"); - return 0.0f; - } - if(GET_BIT(negate, chan)){ - base = -1.0f; - } - return base * - c->Program.Constants.Constants[index].u.Immediate[swz]; + float base = 1.0f; + int swz = GET_SWZ(swizzle, chan); + if (swz >= 4 || index >= c->Program.Constants.Count) { + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if (GET_BIT(negate, chan)) { + base = -1.0f; + } + return base * c->Program.Constants.Constants[index].u.Immediate[swz]; } /** @@ -728,38 +696,41 @@ float rc_get_constant_value( * channel in the swizzle. This is only useful for scalar instructions that are * known to use only one channel of the swizzle. */ -unsigned int rc_get_scalar_src_swz(unsigned int swizzle) +unsigned int +rc_get_scalar_src_swz(unsigned int swizzle) { - unsigned int swz, chan; - for (chan = 0; chan < 4; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz != RC_SWIZZLE_UNUSED) { - break; - } - } - assert(swz != RC_SWIZZLE_UNUSED); - return swz; + unsigned int swz, chan; + for (chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz != RC_SWIZZLE_UNUSED) { + break; + } + } + assert(swz != RC_SWIZZLE_UNUSED); + return swz; } -bool rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst) +bool +rc_inst_has_three_diff_temp_srcs(struct rc_instruction *inst) { - return (inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[2].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[1].Index && - inst->U.I.SrcReg[1].Index != inst->U.I.SrcReg[2].Index && - inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[2].Index); + return (inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[2].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[1].Index && + inst->U.I.SrcReg[1].Index != inst->U.I.SrcReg[2].Index && + inst->U.I.SrcReg[0].Index != inst->U.I.SrcReg[2].Index); } -float rc_inline_to_float(int index) +float +rc_inline_to_float(int index) { - int r300_exponent = (index >> 3) & 0xf; - unsigned r300_mantissa = index & 0x7; - unsigned float_exponent; - unsigned real_float; + int r300_exponent = (index >> 3) & 0xf; + unsigned r300_mantissa = index & 0x7; + unsigned float_exponent; + unsigned real_float; - r300_exponent -= 7; - float_exponent = r300_exponent + 127; - real_float = (r300_mantissa << 20) | (float_exponent << 23); - return uif(real_float); + r300_exponent -= 7; + float_exponent = r300_exponent + 127; + real_float = (r300_mantissa << 20) | (float_exponent << 23); + return uif(real_float); } diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h index 91c4aa27c2c..2b24d5e9180 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -24,9 +24,8 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); -unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, - rc_swizzle swz_z, rc_swizzle swz_w); +unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); unsigned int combine_swizzles(unsigned int src, unsigned int swz); @@ -34,75 +33,49 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask); unsigned swizzle_mask(unsigned swizzle, unsigned mask); -unsigned int rc_adjust_channels( - unsigned int old_swizzle, - unsigned int conversion_swizzle); +unsigned int rc_adjust_channels(unsigned int old_swizzle, unsigned int conversion_swizzle); -void rc_pair_rewrite_writemask( - struct rc_pair_sub_instruction * sub, - unsigned int conversion_swizzle); +void rc_pair_rewrite_writemask(struct rc_pair_sub_instruction *sub, + unsigned int conversion_swizzle); -void rc_normal_rewrite_writemask( - struct rc_instruction * inst, - unsigned int conversion_swizzle); +void rc_normal_rewrite_writemask(struct rc_instruction *inst, unsigned int conversion_swizzle); -unsigned int rc_rewrite_swizzle( - unsigned int swizzle, - unsigned int new_mask); +unsigned int rc_rewrite_swizzle(unsigned int swizzle, unsigned int new_mask); struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); -void reset_srcreg(struct rc_src_register* reg); +void reset_srcreg(struct rc_src_register *reg); -unsigned int rc_src_reads_dst_mask( - rc_register_file src_file, - unsigned int src_idx, - unsigned int src_swz, - rc_register_file dst_file, - unsigned int dst_idx, - unsigned int dst_mask); +unsigned int rc_src_reads_dst_mask(rc_register_file src_file, unsigned int src_idx, + unsigned int src_swz, rc_register_file dst_file, + unsigned int dst_idx, unsigned int dst_mask); unsigned int rc_source_type_swz(unsigned int swizzle); unsigned int rc_source_type_mask(unsigned int mask); -unsigned int rc_inst_can_use_presub( - struct radeon_compiler * c, - struct rc_instruction * inst, - rc_presubtract_op presub_op, - unsigned int presub_writemask, - const struct rc_src_register * replace_reg, - const struct rc_src_register * presub_src0, - const struct rc_src_register * presub_src1); +unsigned int rc_inst_can_use_presub(struct radeon_compiler *c, struct rc_instruction *inst, + rc_presubtract_op presub_op, unsigned int presub_writemask, + const struct rc_src_register *replace_reg, + const struct rc_src_register *presub_src0, + const struct rc_src_register *presub_src1); -int rc_get_max_index( - struct radeon_compiler * c, - rc_register_file file); +int rc_get_max_index(struct radeon_compiler *c, rc_register_file file); -void rc_pair_remove_src(struct rc_instruction * inst, - unsigned int src_type, - unsigned int source); +void rc_pair_remove_src(struct rc_instruction *inst, unsigned int src_type, unsigned int source); -rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); +rc_opcode rc_get_flow_control_inst(struct rc_instruction *inst); -struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); -struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); +struct rc_instruction *rc_match_endloop(struct rc_instruction *endloop); +struct rc_instruction *rc_match_bgnloop(struct rc_instruction *bgnloop); -unsigned int rc_make_conversion_swizzle( - unsigned int old_mask, - unsigned int new_mask); +unsigned int rc_make_conversion_swizzle(unsigned int old_mask, unsigned int new_mask); -unsigned int rc_src_reg_is_immediate( - struct radeon_compiler * c, - unsigned int file, - unsigned int index); +unsigned int rc_src_reg_is_immediate(struct radeon_compiler *c, unsigned int file, + unsigned int index); -float rc_get_constant_value( - struct radeon_compiler * c, - unsigned int index, - unsigned int swizzle, - unsigned int negate, - unsigned int chan); +float rc_get_constant_value(struct radeon_compiler *c, unsigned int index, unsigned int swizzle, + unsigned int negate, unsigned int chan); unsigned int rc_get_scalar_src_swz(unsigned int swizzle); diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c index 4a6931d4e5c..5cf4dcdb15b 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -11,199 +11,174 @@ #include "radeon_program.h" struct read_write_mask_data { - void * UserData; - rc_read_write_mask_fn Cb; + void *UserData; + rc_read_write_mask_fn Cb; }; -static void reads_normal_callback( - void * userdata, - struct rc_instruction * fullinst, - struct rc_src_register * src) +static void +reads_normal_callback(void *userdata, struct rc_instruction *fullinst, struct rc_src_register *src) { - struct read_write_mask_data * cb_data = userdata; - unsigned int refmask = 0; - unsigned int chan; - for(chan = 0; chan < 4; chan++) { - refmask |= 1 << GET_SWZ(src->Swizzle, chan); - } - refmask &= RC_MASK_XYZW; + struct read_write_mask_data *cb_data = userdata; + unsigned int refmask = 0; + unsigned int chan; + for (chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src->Swizzle, chan); + } + refmask &= RC_MASK_XYZW; - if (refmask) { - cb_data->Cb(cb_data->UserData, fullinst, src->File, - src->Index, refmask); - } + if (refmask) { + cb_data->Cb(cb_data->UserData, fullinst, src->File, src->Index, refmask); + } - if (refmask && src->RelAddr) { - cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, - RC_MASK_X); - } + if (refmask && src->RelAddr) { + cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); + } } -static void pair_get_src_refmasks(unsigned int * refmasks, - struct rc_pair_instruction * inst, - unsigned int swz, unsigned int src) +static void +pair_get_src_refmasks(unsigned int *refmasks, struct rc_pair_instruction *inst, unsigned int swz, + unsigned int src) { - if (swz >= 4) - return; + if (swz >= 4) + return; - if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { - if(src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - int srcp_regs = - rc_presubtract_src_reg_count( - inst->RGB.Src[src].Index); - for(i = 0; i < srcp_regs; i++) { - refmasks[i] |= 1 << swz; - } - } - else { - refmasks[src] |= 1 << swz; - } - } + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count(inst->RGB.Src[src].Index); + for (i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } else { + refmasks[src] |= 1 << swz; + } + } - if (swz == RC_SWIZZLE_W) { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - int srcp_regs = rc_presubtract_src_reg_count( - inst->Alpha.Src[src].Index); - for(i = 0; i < srcp_regs; i++) { - refmasks[i] |= 1 << swz; - } - } - else { - refmasks[src] |= 1 << swz; - } - } + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count(inst->Alpha.Src[src].Index); + for (i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } else { + refmasks[src] |= 1 << swz; + } + } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +static void +reads_pair(struct rc_instruction *fullinst, rc_read_write_mask_fn cb, void *userdata) { - struct rc_pair_instruction * inst = &fullinst->U.P; - unsigned int refmasks[3] = { 0, 0, 0 }; + struct rc_pair_instruction *inst = &fullinst->U.P; + unsigned int refmasks[3] = {0, 0, 0}; - unsigned int arg; + unsigned int arg; - for(arg = 0; arg < 3; ++arg) { - unsigned int chan; - for(chan = 0; chan < 3; ++chan) { - unsigned int swz_rgb = - GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); - unsigned int swz_alpha = - GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); - pair_get_src_refmasks(refmasks, inst, swz_rgb, - inst->RGB.Arg[arg].Source); - pair_get_src_refmasks(refmasks, inst, swz_alpha, - inst->Alpha.Arg[arg].Source); - } - } + for (arg = 0; arg < 3; ++arg) { + unsigned int chan; + for (chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, inst->Alpha.Arg[arg].Source); + } + } - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, - refmasks[src] & RC_MASK_XYZ); + for (unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); - if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); - } + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } } -static void pair_sub_for_all_args( - struct rc_instruction * fullinst, - struct rc_pair_sub_instruction * sub, - rc_pair_read_arg_fn cb, - void * userdata) +static void +pair_sub_for_all_args(struct rc_instruction *fullinst, struct rc_pair_sub_instruction *sub, + rc_pair_read_arg_fn cb, void *userdata) { - int i; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + int i; + const struct rc_opcode_info *info = rc_get_opcode_info(sub->Opcode); - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type; + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type; - src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); - if (src_type == RC_SOURCE_NONE) - continue; + if (src_type == RC_SOURCE_NONE) + continue; - if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { - unsigned int presub_type; - unsigned int presub_src_count; - struct rc_pair_instruction_source * src_array; - unsigned int j; + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source *src_array; + unsigned int j; - if (src_type & RC_SOURCE_RGB) { - presub_type = fullinst-> - U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; - src_array = fullinst->U.P.RGB.Src; - } else { - presub_type = fullinst-> - U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; - src_array = fullinst->U.P.Alpha.Src; - } - presub_src_count - = rc_presubtract_src_reg_count(presub_type); - for(j = 0; j < presub_src_count; j++) { - cb(userdata, fullinst, &sub->Arg[i], - &src_array[j]); - } - } else { - struct rc_pair_instruction_source * src = - rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); - if (src) { - cb(userdata, fullinst, &sub->Arg[i], src); - } - } - } + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count = rc_presubtract_src_reg_count(presub_type); + for (j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], &src_array[j]); + } + } else { + struct rc_pair_instruction_source *src = rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } + } } /* This function calls the callback function (cb) for each source used by * the instruction. * */ -void rc_for_all_reads_src( - struct rc_instruction * inst, - rc_read_src_fn cb, - void * userdata) +void +rc_for_all_reads_src(struct rc_instruction *inst, rc_read_src_fn cb, void *userdata) { - const struct rc_opcode_info * opcode = - rc_get_opcode_info(inst->U.I.Opcode); + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - /* This function only works with normal instructions. */ - if (inst->Type != RC_INSTRUCTION_NORMAL) { - assert(0); - return; - } + /* This function only works with normal instructions. */ + if (inst->Type != RC_INSTRUCTION_NORMAL) { + assert(0); + return; + } - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { - unsigned int i; - unsigned int srcp_regs = rc_presubtract_src_reg_count( - inst->U.I.PreSub.Opcode); - for( i = 0; i < srcp_regs; i++) { - cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); - } - } else { - cb(userdata, inst, &inst->U.I.SrcReg[src]); - } - } + if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count(inst->U.I.PreSub.Opcode); + for (i = 0; i < srcp_regs; i++) { + cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); + } + } else { + cb(userdata, inst, &inst->U.I.SrcReg[src]); + } + } } /** * This function calls the callback function (cb) for each arg of the RGB and * alpha components. */ -void rc_pair_for_all_reads_arg( - struct rc_instruction * inst, - rc_pair_read_arg_fn cb, - void * userdata) +void +rc_pair_for_all_reads_arg(struct rc_instruction *inst, rc_pair_read_arg_fn cb, void *userdata) { - /* This function only works with pair instructions. */ - if (inst->Type != RC_INSTRUCTION_PAIR) { - assert(0); - return; - } + /* This function only works with pair instructions. */ + if (inst->Type != RC_INSTRUCTION_PAIR) { + assert(0); + return; + } - pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); - pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); } /** @@ -213,45 +188,46 @@ void rc_pair_for_all_reads_arg( * the callback may also be called multiple times. * Also, the writemask of the instruction is not taken into account. */ -void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +void +rc_for_all_reads_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - struct read_write_mask_data cb_data; - cb_data.UserData = userdata; - cb_data.Cb = cb; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct read_write_mask_data cb_data; + cb_data.UserData = userdata; + cb_data.Cb = cb; - rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); - } else { - reads_pair(inst, cb, userdata); - } + rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); + } else { + reads_pair(inst, cb, userdata); + } } - - -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +static void +writes_normal(struct rc_instruction *fullinst, rc_read_write_mask_fn cb, void *userdata) { - struct rc_sub_instruction * inst = &fullinst->U.I; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + struct rc_sub_instruction *inst = &fullinst->U.I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg && inst->DstReg.WriteMask) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); - if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +static void +writes_pair(struct rc_instruction *fullinst, rc_read_write_mask_fn cb, void *userdata) { - struct rc_pair_instruction * inst = &fullinst->U.P; + struct rc_pair_instruction *inst = &fullinst->U.P; - if (inst->RGB.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); - if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); - if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } /** @@ -260,29 +236,30 @@ static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +void +rc_for_all_writes_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - writes_normal(inst, cb, userdata); - } else { - writes_pair(inst, cb, userdata); - } + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } } - struct mask_to_chan_data { - void * UserData; - rc_read_write_chan_fn Fn; + void *UserData; + rc_read_write_chan_fn Fn; }; -static void mask_to_chan_cb(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void +mask_to_chan_cb(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index, + unsigned int mask) { - struct mask_to_chan_data * d = data; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(mask, chan)) - d->Fn(d->UserData, inst, file, index, chan); - } + struct mask_to_chan_data *d = data; + for (unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } } /** @@ -291,12 +268,13 @@ static void mask_to_chan_cb(void * data, struct rc_instruction * inst, * This is conservative, i.e. channels may be called multiple times, * and the writemask of the instruction is not taken into account. */ -void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +void +rc_for_all_reads_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata) { - struct mask_to_chan_data d; - d.UserData = userdata; - d.Fn = cb; - rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); } /** @@ -304,276 +282,259 @@ void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn c * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +void +rc_for_all_writes_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata) { - struct mask_to_chan_data d; - d.UserData = userdata; - d.Fn = cb; - rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); } -static void remap_normal_instruction(struct rc_instruction * fullinst, - rc_remap_register_fn cb, void * userdata) +static void +remap_normal_instruction(struct rc_instruction *fullinst, rc_remap_register_fn cb, void *userdata) { - struct rc_sub_instruction * inst = &fullinst->U.I; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - unsigned int remapped_presub = 0; + struct rc_sub_instruction *inst = &fullinst->U.I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; - if (opcode->HasDstReg) { - rc_register_file file = inst->DstReg.File; - unsigned int index = inst->DstReg.Index; + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; - cb(userdata, fullinst, &file, &index); + cb(userdata, fullinst, &file, &index); - inst->DstReg.File = file; - inst->DstReg.Index = index; - } + inst->DstReg.File = file; + inst->DstReg.Index = index; + } - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - rc_register_file file = inst->SrcReg[src].File; - unsigned int index = inst->SrcReg[src].Index; + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; - if (file == RC_FILE_PRESUB) { - unsigned int i; - unsigned int srcp_srcs = rc_presubtract_src_reg_count( - inst->PreSub.Opcode); - /* Make sure we only remap presubtract sources once in - * case more than one source register reads the - * presubtract result. */ - if (remapped_presub) - continue; + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count(inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; - for(i = 0; i < srcp_srcs; i++) { - file = inst->PreSub.SrcReg[i].File; - index = inst->PreSub.SrcReg[i].Index; - cb(userdata, fullinst, &file, &index); - inst->PreSub.SrcReg[i].File = file; - inst->PreSub.SrcReg[i].Index = index; - } - remapped_presub = 1; - } - else { - cb(userdata, fullinst, &file, &index); + for (i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } + remapped_presub = 1; + } else { + cb(userdata, fullinst, &file, &index); - inst->SrcReg[src].File = file; - inst->SrcReg[src].Index = index; - } - } + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } + } } -static void remap_pair_instruction(struct rc_instruction * fullinst, - rc_remap_register_fn cb, void * userdata) +static void +remap_pair_instruction(struct rc_instruction *fullinst, rc_remap_register_fn cb, void *userdata) { - struct rc_pair_instruction * inst = &fullinst->U.P; + struct rc_pair_instruction *inst = &fullinst->U.P; - if (inst->RGB.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->RGB.DestIndex; + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; - cb(userdata, fullinst, &file, &index); + cb(userdata, fullinst, &file, &index); - inst->RGB.DestIndex = index; - } + inst->RGB.DestIndex = index; + } - if (inst->Alpha.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->Alpha.DestIndex; + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; - cb(userdata, fullinst, &file, &index); + cb(userdata, fullinst, &file, &index); - inst->Alpha.DestIndex = index; - } + inst->Alpha.DestIndex = index; + } - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - rc_register_file file = inst->RGB.Src[src].File; - unsigned int index = inst->RGB.Src[src].Index; + for (unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; - cb(userdata, fullinst, &file, &index); + cb(userdata, fullinst, &file, &index); - inst->RGB.Src[src].File = file; - inst->RGB.Src[src].Index = index; - } + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } - if (inst->Alpha.Src[src].Used) { - rc_register_file file = inst->Alpha.Src[src].File; - unsigned int index = inst->Alpha.Src[src].Index; + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; - cb(userdata, fullinst, &file, &index); + cb(userdata, fullinst, &file, &index); - inst->Alpha.Src[src].File = file; - inst->Alpha.Src[src].Index = index; - } - } + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } } - /** * Remap all register accesses according to the given function. * That is, call the function \p cb for each referenced register (both read and written) * and update the given instruction \p inst accordingly * if it modifies its \ref pfile and \ref pindex contents. */ -void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +void +rc_remap_registers(struct rc_instruction *inst, rc_remap_register_fn cb, void *userdata) { - if (inst->Type == RC_INSTRUCTION_NORMAL) - remap_normal_instruction(inst, cb, userdata); - else - remap_pair_instruction(inst, cb, userdata); + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); } struct branch_write_mask { - unsigned int IfWriteMask:4; - unsigned int ElseWriteMask:4; - unsigned int HasElse:1; + unsigned int IfWriteMask : 4; + unsigned int ElseWriteMask : 4; + unsigned int HasElse : 1; }; union get_readers_read_cb { - rc_read_src_fn I; - rc_pair_read_arg_fn P; + rc_read_src_fn I; + rc_pair_read_arg_fn P; }; struct get_readers_callback_data { - struct radeon_compiler * C; - struct rc_reader_data * ReaderData; - rc_read_src_fn ReadNormalCB; - rc_pair_read_arg_fn ReadPairCB; - rc_read_write_mask_fn WriteCB; - rc_register_file DstFile; - unsigned int DstIndex; - unsigned int DstMask; - unsigned int AliveWriteMask; - /* For convenience, this is indexed starting at 1 */ - struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; + struct radeon_compiler *C; + struct rc_reader_data *ReaderData; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; + rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; + unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; -static struct rc_reader * add_reader( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask) +static struct rc_reader * +add_reader(struct memory_pool *pool, struct rc_reader_data *data, struct rc_instruction *inst, + unsigned int mask) { - struct rc_reader * new; - memory_pool_array_reserve(pool, struct rc_reader, data->Readers, - data->ReaderCount, data->ReadersReserved, 1); - new = &data->Readers[data->ReaderCount++]; - new->Inst = inst; - new->WriteMask = mask; - return new; + struct rc_reader *new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, data->ReaderCount, + data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + return new; } -static void add_reader_normal( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask, - struct rc_src_register * src) +static void +add_reader_normal(struct memory_pool *pool, struct rc_reader_data *data, + struct rc_instruction *inst, unsigned int mask, struct rc_src_register *src) { - struct rc_reader * new = add_reader(pool, data, inst, mask); - new->U.I.Src = src; + struct rc_reader *new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; } - -static void add_reader_pair( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) +static void +add_reader_pair(struct memory_pool *pool, struct rc_reader_data *data, struct rc_instruction *inst, + unsigned int mask, struct rc_pair_instruction_arg *arg, + struct rc_pair_instruction_source *src) { - struct rc_reader * new = add_reader(pool, data, inst, mask); - new->U.P.Src = src; - new->U.P.Arg = arg; + struct rc_reader *new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; } -static unsigned int get_readers_read_callback( - struct get_readers_callback_data * cb_data, - rc_register_file file, - unsigned int index, - unsigned int swizzle) +static unsigned int +get_readers_read_callback(struct get_readers_callback_data *cb_data, rc_register_file file, + unsigned int index, unsigned int swizzle) { - unsigned int shared_mask, read_mask; + unsigned int shared_mask, read_mask; - shared_mask = rc_src_reads_dst_mask(file, index, swizzle, - cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, cb_data->DstFile, cb_data->DstIndex, + cb_data->AliveWriteMask); - if (shared_mask == RC_MASK_NONE) - return shared_mask; + if (shared_mask == RC_MASK_NONE) + return shared_mask; - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ - read_mask = rc_swizzle_to_writemask(swizzle); - if (cb_data->ReaderData->AbortOnRead & read_mask) { - cb_data->ReaderData->Abort = 1; - return shared_mask; - } + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } - if (cb_data->ReaderData->LoopDepth > 0) { - cb_data->ReaderData->AbortOnWrite |= - (read_mask & cb_data->AliveWriteMask); - } + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= (read_mask & cb_data->AliveWriteMask); + } - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & cb_data->AliveWriteMask) != read_mask) { - cb_data->ReaderData->Abort = 1; - return shared_mask; - } + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } - return shared_mask; + return shared_mask; } -static void get_readers_pair_read_callback( - void * userdata, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) +static void +get_readers_pair_read_callback(void *userdata, struct rc_instruction *inst, + struct rc_pair_instruction_arg *arg, + struct rc_pair_instruction_source *src) { - unsigned int shared_mask; - struct get_readers_callback_data * d = userdata; + unsigned int shared_mask; + struct get_readers_callback_data *d = userdata; - shared_mask = get_readers_read_callback(d, - src->File, src->Index, arg->Swizzle); + shared_mask = get_readers_read_callback(d, src->File, src->Index, arg->Swizzle); - if (shared_mask == RC_MASK_NONE) - return; + if (shared_mask == RC_MASK_NONE) + return; - if (d->ReadPairCB) - d->ReadPairCB(d->ReaderData, inst, arg, src); + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; - add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); } /** * This function is used by rc_get_readers_normal() to determine whether inst * is a reader of userdata->ReaderData->Writer */ -static void get_readers_normal_read_callback( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +get_readers_normal_read_callback(void *userdata, struct rc_instruction *inst, + struct rc_src_register *src) { - struct get_readers_callback_data * d = userdata; - unsigned int shared_mask; + struct get_readers_callback_data *d = userdata; + unsigned int shared_mask; - shared_mask = get_readers_read_callback(d, - src->File, src->Index, src->Swizzle); + shared_mask = get_readers_read_callback(d, src->File, src->Index, src->Swizzle); - if (shared_mask == RC_MASK_NONE) - return; - /* The callback function could potentially clear d->ReaderData->Abort, - * so we need to call it before we return. */ - if (d->ReadNormalCB) - d->ReadNormalCB(d->ReaderData, inst, src); + if (shared_mask == RC_MASK_NONE) + return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; - add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); } /** @@ -581,256 +542,231 @@ static void get_readers_normal_read_callback( * userdata->ReaderData->Writer is dead (i. e. All components of its * destination register have been overwritten by other instructions). */ -static void get_readers_write_callback( - void *userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) +static void +get_readers_write_callback(void *userdata, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct get_readers_callback_data * d = userdata; + struct get_readers_callback_data *d = userdata; - if (index == d->DstIndex && file == d->DstFile) { - unsigned int shared_mask = mask & d->DstMask; - d->ReaderData->AbortOnRead &= ~shared_mask; - d->AliveWriteMask &= ~shared_mask; - if (d->ReaderData->AbortOnWrite & shared_mask) { - d->ReaderData->Abort = 1; - } - } + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } + } - if(d->WriteCB) - d->WriteCB(d->ReaderData, inst, file, index, mask); + if (d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); } -static void push_branch_mask( - struct get_readers_callback_data * d, - unsigned int * branch_depth) +static void +push_branch_mask(struct get_readers_callback_data *d, unsigned int *branch_depth) { - (*branch_depth)++; - if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { - d->ReaderData->Abort = 1; - return; - } - d->BranchMasks[*branch_depth].IfWriteMask = - d->AliveWriteMask; + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = d->AliveWriteMask; } -static void pop_branch_mask( - struct get_readers_callback_data * d, - unsigned int * branch_depth) +static void +pop_branch_mask(struct get_readers_callback_data *d, unsigned int *branch_depth) { - struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + struct branch_write_mask *masks = &d->BranchMasks[*branch_depth]; - if (masks->HasElse) { - /* Abort on read for components that were written in the IF - * block. */ - d->ReaderData->AbortOnRead |= - masks->IfWriteMask & ~masks->ElseWriteMask; - /* Abort on read for components that were written in the ELSE - * block. */ - d->ReaderData->AbortOnRead |= - masks->ElseWriteMask & ~d->AliveWriteMask; + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= masks->ElseWriteMask & ~d->AliveWriteMask; - d->AliveWriteMask = masks->IfWriteMask - ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) - & (masks->IfWriteMask ^ d->AliveWriteMask)); - } else { - d->ReaderData->AbortOnRead |= - masks->IfWriteMask & ~d->AliveWriteMask; - d->AliveWriteMask = masks->IfWriteMask; - - } - memset(masks, 0, sizeof(struct branch_write_mask)); - (*branch_depth)--; + d->AliveWriteMask = masks->IfWriteMask ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) & + (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; } -static void get_readers_for_single_write( - void * userdata, - struct rc_instruction * writer, - rc_register_file dst_file, - unsigned int dst_index, - unsigned int dst_mask) +static void +get_readers_for_single_write(void *userdata, struct rc_instruction *writer, + rc_register_file dst_file, unsigned int dst_index, + unsigned int dst_mask) { - struct rc_instruction * tmp; - unsigned int branch_depth = 0; - struct rc_instruction * endloop = NULL; - unsigned int abort_on_read_at_endloop = 0; - unsigned int abort_on_read_at_break = 0; - unsigned int alive_write_mask_at_breaks = 0; - struct get_readers_callback_data * d = userdata; + struct rc_instruction *tmp; + unsigned int branch_depth = 0; + struct rc_instruction *endloop = NULL; + unsigned int abort_on_read_at_endloop = 0; + unsigned int abort_on_read_at_break = 0; + unsigned int alive_write_mask_at_breaks = 0; + struct get_readers_callback_data *d = userdata; - d->ReaderData->Writer = writer; - d->ReaderData->AbortOnRead = 0; - d->ReaderData->AbortOnWrite = 0; - d->ReaderData->LoopDepth = 0; - d->ReaderData->InElse = 0; - d->DstFile = dst_file; - d->DstIndex = dst_index; - d->DstMask = dst_mask; - d->AliveWriteMask = dst_mask; - memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); - if (!dst_mask) - return; + if (!dst_mask) + return; - for(tmp = writer->Next; tmp != &d->C->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = rc_get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - d->ReaderData->LoopDepth++; - push_branch_mask(d, &branch_depth); - break; - case RC_OPCODE_ENDLOOP: - if (d->ReaderData->LoopDepth > 0) { - d->ReaderData->LoopDepth--; - if (d->ReaderData->LoopDepth == 0) { - d->ReaderData->AbortOnWrite = 0; - } - pop_branch_mask(d, &branch_depth); - } else { - /* Here we have reached an ENDLOOP without - * seeing its BGNLOOP. These means that - * the writer was written inside of a loop, - * so it could have readers that are above it - * (i.e. they have a lower IP). To find these - * readers we jump to the BGNLOOP instruction - * and check each instruction until we get - * back to the writer. - */ - endloop = tmp; - tmp = rc_match_endloop(tmp); - if (!tmp) { - rc_error(d->C, "Failed to match endloop.\n"); - d->ReaderData->Abort = 1; - return; - } - abort_on_read_at_endloop = d->ReaderData->AbortOnRead; - d->ReaderData->AbortOnRead |= d->AliveWriteMask; - continue; - } - break; - case RC_OPCODE_BRK: - if (branch_depth == 0 && d->ReaderData->LoopDepth == 0) { - tmp = rc_match_bgnloop(tmp); - d->ReaderData->AbortOnRead = d->AliveWriteMask; - } else { - struct branch_write_mask * masks = &d->BranchMasks[branch_depth]; - alive_write_mask_at_breaks |= d->AliveWriteMask; - if (masks->HasElse) { - /* Abort on read for components that were written in the IF - * block. */ - abort_on_read_at_break |= - masks->IfWriteMask & ~masks->ElseWriteMask; - /* Abort on read for components that were written in the ELSE - * block. */ - abort_on_read_at_break |= - masks->ElseWriteMask & ~d->AliveWriteMask; - } else { - abort_on_read_at_break |= - masks->IfWriteMask & ~d->AliveWriteMask; - } - } - break; - case RC_OPCODE_IF: - push_branch_mask(d, &branch_depth); - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) { - d->ReaderData->InElse = 1; - } else { - unsigned int temp_mask = d->AliveWriteMask; - d->AliveWriteMask = - d->BranchMasks[branch_depth].IfWriteMask; - d->BranchMasks[branch_depth].ElseWriteMask = - temp_mask; - d->BranchMasks[branch_depth].HasElse = 1; - } - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - d->ReaderData->AbortOnRead = d->AliveWriteMask; - d->ReaderData->InElse = 0; - } - else { - pop_branch_mask(d, &branch_depth); - } - break; - default: - break; - } + for (tmp = writer->Next; tmp != &d->C->Program.Instructions; tmp = tmp->Next) { + rc_opcode opcode = rc_get_flow_control_inst(tmp); + switch (opcode) { + case RC_OPCODE_BGNLOOP: + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ENDLOOP: + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; + } + break; + case RC_OPCODE_BRK: + if (branch_depth == 0 && d->ReaderData->LoopDepth == 0) { + tmp = rc_match_bgnloop(tmp); + d->ReaderData->AbortOnRead = d->AliveWriteMask; + } else { + struct branch_write_mask *masks = &d->BranchMasks[branch_depth]; + alive_write_mask_at_breaks |= d->AliveWriteMask; + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + abort_on_read_at_break |= masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + abort_on_read_at_break |= masks->ElseWriteMask & ~d->AliveWriteMask; + } else { + abort_on_read_at_break |= masks->IfWriteMask & ~d->AliveWriteMask; + } + } + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } else { + pop_branch_mask(d, &branch_depth); + } + break; + default: + break; + } - if (d->ReaderData->InElse) - continue; + if (d->ReaderData->InElse) + continue; - if (tmp->Type == RC_INSTRUCTION_NORMAL) { - rc_for_all_reads_src(tmp, - get_readers_normal_read_callback, d); - } else { - rc_pair_for_all_reads_arg(tmp, - get_readers_pair_read_callback, d); - } + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, get_readers_pair_read_callback, d); + } - /* This can happen when we jump from an ENDLOOP to BGNLOOP */ - if (tmp == writer) { - tmp = endloop; - endloop = NULL; - d->ReaderData->AbortOnRead = abort_on_read_at_endloop - | abort_on_read_at_break; - /* Restore the AliveWriteMask to account for all possible - * exits from the loop. */ - d->AliveWriteMask = alive_write_mask_at_breaks; - alive_write_mask_at_breaks = 0; - continue; - } - rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop | abort_on_read_at_break; + /* Restore the AliveWriteMask to account for all possible + * exits from the loop. */ + d->AliveWriteMask = alive_write_mask_at_breaks; + alive_write_mask_at_breaks = 0; + continue; + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; - /* The check for !endloop in needed for the following scenario: - * - * 0 MOV TEMP[0] none.0 - * 1 BGNLOOP - * 2 IF some exit condition - * 3 BRK - * 4 ENDIF - * 5 ADD TEMP[0], TEMP[0], CONST[0] - * 6 ADD TEMP[0], TEMP[0], none.1 - * 7 ENDLOOP - * 8 MOV OUT[0] TEMP[0] - * - * When we search for the readers of instruction 6, we encounter the ENDLOOP - * and continue searching at BGNLOOP. At instruction 5 the AliveWriteMask - * becomes 0 and we would stop the search. However we still need to continue - * back to 6 from which we jump after the endloop, restore the AliveWriteMask - * according to the possible states at breaks and continue after the loop. - */ - if (branch_depth == 0 && !d->AliveWriteMask && !endloop) - return; - } + /* The check for !endloop in needed for the following scenario: + * + * 0 MOV TEMP[0] none.0 + * 1 BGNLOOP + * 2 IF some exit condition + * 3 BRK + * 4 ENDIF + * 5 ADD TEMP[0], TEMP[0], CONST[0] + * 6 ADD TEMP[0], TEMP[0], none.1 + * 7 ENDLOOP + * 8 MOV OUT[0] TEMP[0] + * + * When we search for the readers of instruction 6, we encounter the ENDLOOP + * and continue searching at BGNLOOP. At instruction 5 the AliveWriteMask + * becomes 0 and we would stop the search. However we still need to continue + * back to 6 from which we jump after the endloop, restore the AliveWriteMask + * according to the possible states at breaks and continue after the loop. + */ + if (branch_depth == 0 && !d->AliveWriteMask && !endloop) + return; + } } -static void init_get_readers_callback_data( - struct get_readers_callback_data * d, - struct rc_reader_data * reader_data, - struct radeon_compiler * c, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) +static void +init_get_readers_callback_data(struct get_readers_callback_data *d, + struct rc_reader_data *reader_data, struct radeon_compiler *c, + rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) { - reader_data->C = c; - reader_data->Abort = 0; - reader_data->ReaderCount = 0; - reader_data->ReadersReserved = 0; - reader_data->Readers = NULL; + reader_data->C = c; + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; - d->C = c; - d->ReaderData = reader_data; - d->ReadNormalCB = read_normal_cb; - d->ReadPairCB = read_pair_cb; - d->WriteCB = write_cb; + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; } /** @@ -871,38 +807,30 @@ static void init_get_readers_callback_data( * @param write_cb This function will be called for every instruction after * writer. */ -void rc_get_readers( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) +void +rc_get_readers(struct radeon_compiler *c, struct rc_instruction *writer, + struct rc_reader_data *data, rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { - struct get_readers_callback_data d; + struct get_readers_callback_data d; - init_get_readers_callback_data(&d, data, c, read_normal_cb, - read_pair_cb, write_cb); + init_get_readers_callback_data(&d, data, c, read_normal_cb, read_pair_cb, write_cb); - rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } -void rc_get_readers_sub( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_pair_sub_instruction * sub_writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) +void +rc_get_readers_sub(struct radeon_compiler *c, struct rc_instruction *writer, + struct rc_pair_sub_instruction *sub_writer, struct rc_reader_data *data, + rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) { - struct get_readers_callback_data d; + struct get_readers_callback_data d; - init_get_readers_callback_data(&d, data, c, read_normal_cb, - read_pair_cb, write_cb); + init_get_readers_callback_data(&d, data, c, read_normal_cb, read_pair_cb, write_cb); - if (sub_writer->WriteMask) { - get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, - sub_writer->DestIndex, sub_writer->WriteMask); - } + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, sub_writer->DestIndex, + sub_writer->WriteMask); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h index 29ba9a8b370..bb15c6f8136 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -18,98 +18,88 @@ struct rc_pair_instruction_source; struct rc_pair_sub_instruction; struct rc_compiler; - /** * Help analyze and modify the register accesses of instructions. */ /*@{*/ -typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); -void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +typedef void (*rc_read_write_chan_fn)(void *userdata, struct rc_instruction *inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata); +void rc_for_all_writes_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata); -typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask); -void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); -void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +typedef void (*rc_read_write_mask_fn)(void *userdata, struct rc_instruction *inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata); +void rc_for_all_writes_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata); -typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, - struct rc_src_register * src); -void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, - void * userdata); +typedef void (*rc_read_src_fn)(void *userdata, struct rc_instruction *inst, + struct rc_src_register *src); +void rc_for_all_reads_src(struct rc_instruction *inst, rc_read_src_fn cb, void *userdata); -typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src); -void rc_pair_for_all_reads_arg(struct rc_instruction * inst, - rc_pair_read_arg_fn cb, void * userdata); +typedef void (*rc_pair_read_arg_fn)(void *userdata, struct rc_instruction *inst, + struct rc_pair_instruction_arg *arg, + struct rc_pair_instruction_source *src); +void rc_pair_for_all_reads_arg(struct rc_instruction *inst, rc_pair_read_arg_fn cb, void *userdata); -typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex); -void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +typedef void (*rc_remap_register_fn)(void *userdata, struct rc_instruction *inst, + rc_register_file *pfile, unsigned int *pindex); +void rc_remap_registers(struct rc_instruction *inst, rc_remap_register_fn cb, void *userdata); /*@}*/ struct rc_reader { - struct rc_instruction * Inst; - unsigned int WriteMask; - union { - struct { - struct rc_src_register * Src; - } I; - struct { - struct rc_pair_instruction_arg * Arg; - struct rc_pair_instruction_source * Src; - } P; - } U; + struct rc_instruction *Inst; + unsigned int WriteMask; + union { + struct { + struct rc_src_register *Src; + } I; + struct { + struct rc_pair_instruction_arg *Arg; + struct rc_pair_instruction_source *Src; + } P; + } U; }; struct rc_reader_data { - struct radeon_compiler * C; + struct radeon_compiler *C; - unsigned int Abort; - unsigned int AbortOnRead; - unsigned int AbortOnWrite; - unsigned int LoopDepth; - unsigned int InElse; - struct rc_instruction * Writer; + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; + unsigned int InElse; + struct rc_instruction *Writer; - unsigned int ReaderCount; - unsigned int ReadersReserved; - struct rc_reader * Readers; + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader *Readers; - /* If this flag is enabled, rc_get_readers will exit as soon possible - * after the Abort flag is set.*/ - unsigned int ExitOnAbort; - void * CbData; + /* If this flag is enabled, rc_get_readers will exit as soon possible + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; + void *CbData; }; -void rc_get_readers( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb); +void rc_get_readers(struct radeon_compiler *c, struct rc_instruction *writer, + struct rc_reader_data *data, rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); -void rc_get_readers_sub( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_pair_sub_instruction * sub_writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb); +void rc_get_readers_sub(struct radeon_compiler *c, struct rc_instruction *writer, + struct rc_pair_sub_instruction *sub_writer, struct rc_reader_data *data, + rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); /** * Compiler passes based on dataflow analysis. */ /*@{*/ -typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, - void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); -void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); -void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); +typedef void (*rc_dataflow_mark_outputs_fn)(void *userdata, void *data, + void (*mark_fn)(void *data, unsigned int index, + unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler *c, void *user); +void rc_dataflow_swizzles(struct radeon_compiler *c, void *user); /*@}*/ -void rc_optimize(struct radeon_compiler * c, void *user); +void rc_optimize(struct radeon_compiler *c, void *user); void rc_inline_literals(struct radeon_compiler *c, void *user); int rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data); diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c index ce193ce1ce7..2047f87318e 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c @@ -7,323 +7,319 @@ #include "radeon_compiler.h" - struct updatemask_state { - unsigned char Output[RC_REGISTER_MAX_INDEX]; - unsigned char Temporary[RC_REGISTER_MAX_INDEX]; - unsigned char Address; - unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; }; struct instruction_state { - unsigned char WriteMask:4; - unsigned char WriteALUResult:1; - unsigned char SrcReg[3]; + unsigned char WriteMask : 4; + unsigned char WriteALUResult : 1; + unsigned char SrcReg[3]; }; struct loopinfo { - struct updatemask_state StoreEndloop; - unsigned int BreakCount; - unsigned int BreaksReserved; + struct updatemask_state StoreEndloop; + unsigned int BreakCount; + unsigned int BreaksReserved; }; struct branchinfo { - unsigned int HaveElse:1; + unsigned int HaveElse : 1; - struct updatemask_state StoreEndif; - struct updatemask_state StoreElse; + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; }; struct deadcode_state { - struct radeon_compiler * C; - struct instruction_state * Instructions; + struct radeon_compiler *C; + struct instruction_state *Instructions; - struct updatemask_state R; + struct updatemask_state R; - struct branchinfo * BranchStack; - unsigned int BranchStackSize; - unsigned int BranchStackReserved; + struct branchinfo *BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; - struct loopinfo * LoopStack; - unsigned int LoopStackSize; - unsigned int LoopStackReserved; + struct loopinfo *LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; }; - -static void or_updatemasks( - struct updatemask_state * dst, - struct updatemask_state * a, - struct updatemask_state * b) +static void +or_updatemasks(struct updatemask_state *dst, struct updatemask_state *a, struct updatemask_state *b) { - for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { - dst->Output[i] = a->Output[i] | b->Output[i]; - dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; - } + for (unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } - for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) - dst->Special[i] = a->Special[i] | b->Special[i]; + for (unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; - dst->Address = a->Address | b->Address; + dst->Address = a->Address | b->Address; } -static void push_loop(struct deadcode_state * s) +static void +push_loop(struct deadcode_state *s) { - memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, - s->LoopStackSize, s->LoopStackReserved, 1); - memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); - memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R)); + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, s->LoopStackSize, + s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); + memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R)); } -static void push_branch(struct deadcode_state * s) +static void +push_branch(struct deadcode_state *s) { - struct branchinfo * branch; + struct branchinfo *branch; - memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, - s->BranchStackSize, s->BranchStackReserved, 1); + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, s->BranchStackSize, + s->BranchStackReserved, 1); - branch = &s->BranchStack[s->BranchStackSize++]; - branch->HaveElse = 0; - memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); + branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); } -static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +static unsigned char * +get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) { - if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { - if (index >= RC_REGISTER_MAX_INDEX) { - rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file); - return NULL; - } + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file); + return NULL; + } - if (file == RC_FILE_OUTPUT) - return &s->R.Output[index]; - else - return &s->R.Temporary[index]; - } else if (file == RC_FILE_ADDRESS) { - return &s->R.Address; - } else if (file == RC_FILE_SPECIAL) { - if (index >= RC_NUM_SPECIAL_REGISTERS) { - rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index); - return NULL; - } + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index); + return NULL; + } - return &s->R.Special[index]; - } + return &s->R.Special[index]; + } - return NULL; + return NULL; } -static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +static void +mark_used(struct deadcode_state *s, rc_register_file file, unsigned int index, unsigned int mask) { - unsigned char * pused = get_used_ptr(s, file, index); - if (pused) - *pused |= mask; + unsigned char *pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; } -static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +static void +update_instruction(struct deadcode_state *s, struct rc_instruction *inst) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - struct instruction_state * insts = &s->Instructions[inst->IP]; - unsigned int usedmask = 0; - unsigned int srcmasks[3]; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state *insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + unsigned int srcmasks[3]; - if (opcode->HasDstReg) { - unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); - if (pused) { - usedmask = *pused & inst->U.I.DstReg.WriteMask; - *pused &= ~usedmask; - } - } + if (opcode->HasDstReg) { + unsigned char *pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } - insts->WriteMask |= usedmask; + insts->WriteMask |= usedmask; - if (inst->U.I.WriteALUResult) { - unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); - if (pused && *pused) { - if (inst->U.I.WriteALUResult == RC_ALURESULT_X) - usedmask |= RC_MASK_X; - else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) - usedmask |= RC_MASK_W; + if (inst->U.I.WriteALUResult) { + unsigned char *pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; - *pused = 0; - insts->WriteALUResult = 1; - } - } + *pused = 0; + insts->WriteALUResult = 1; + } + } - rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - unsigned int refmask = 0; - unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; - insts->SrcReg[src] |= newsrcmask; + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(newsrcmask, chan)) - refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); - } + for (unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } - /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ - refmask &= RC_MASK_XYZW; + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; - if (!refmask) - continue; + if (!refmask) + continue; - mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); - if (inst->U.I.SrcReg[src].RelAddr) - mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); - } + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } } -void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) +void +rc_dataflow_deadcode(struct radeon_compiler *c, void *user) { - struct deadcode_state s; - unsigned int nr_instructions; - unsigned int ip; + struct deadcode_state s; + unsigned int nr_instructions; + unsigned int ip; - memset(&s, 0, sizeof(s)); - s.C = c; + memset(&s, 0, sizeof(s)); + s.C = c; - nr_instructions = rc_recompute_ips(c); - s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); - memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + nr_instructions = rc_recompute_ips(c); + s.Instructions = + memory_pool_malloc(&c->Pool, sizeof(struct instruction_state) * nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state) * nr_instructions); - for(struct rc_instruction * inst = c->Program.Instructions.Prev; - inst != &c->Program.Instructions; - inst = inst->Prev) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (struct rc_instruction *inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; inst = inst->Prev) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - /* Assume all output regs are live. Anything else should have been - * eliminated before it got to us. - */ - if (opcode->HasDstReg) - mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask); + /* Assume all output regs are live. Anything else should have been + * eliminated before it got to us. + */ + if (opcode->HasDstReg) + mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask); - switch(opcode->Opcode){ - /* Mark all sources in the loop body as used before doing - * normal deadcode analysis. This is probably not optimal. - * Save this pessimistic deadcode state and restore it anytime - * we see a break just to be extra sure. - */ - case RC_OPCODE_ENDLOOP: - { - int endloops = 1; - struct rc_instruction *ptr; - for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ - opcode = rc_get_opcode_info(ptr->U.I.Opcode); - if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - endloops--; - continue; - } - if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ - endloops++; - continue; - } - if(opcode->HasDstReg){ - int src = 0; - unsigned int srcmasks[3]; - unsigned int writemask = ptr->U.I.DstReg.WriteMask; - if (ptr->U.I.WriteALUResult == RC_ALURESULT_X) - writemask |= RC_MASK_X; - else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W) - writemask |= RC_MASK_W; + switch (opcode->Opcode) { + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + * Save this pessimistic deadcode state and restore it anytime + * we see a break just to be extra sure. + */ + case RC_OPCODE_ENDLOOP: { + int endloops = 1; + struct rc_instruction *ptr; + for (ptr = inst->Prev; endloops > 0; ptr = ptr->Prev) { + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops--; + continue; + } + if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops++; + continue; + } + if (opcode->HasDstReg) { + int src = 0; + unsigned int srcmasks[3]; + unsigned int writemask = ptr->U.I.DstReg.WriteMask; + if (ptr->U.I.WriteALUResult == RC_ALURESULT_X) + writemask |= RC_MASK_X; + else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W) + writemask |= RC_MASK_W; - rc_compute_sources_for_writemask(ptr, writemask, srcmasks); - for(src=0; src < opcode->NumSrcRegs; src++){ - mark_used(&s, - ptr->U.I.SrcReg[src].File, - ptr->U.I.SrcReg[src].Index, - srcmasks[src]); - } - } - } - push_loop(&s); - break; - } - case RC_OPCODE_BRK: - { - struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; - memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R)); - break; - } - case RC_OPCODE_BGNLOOP: - s.LoopStackSize--; - break; - case RC_OPCODE_CONT: - break; - case RC_OPCODE_ENDIF: - push_branch(&s); - break; - default: - if (opcode->IsFlowControl && s.BranchStackSize) { - struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; - if (opcode->Opcode == RC_OPCODE_IF) { - or_updatemasks(&s.R, - &s.R, - branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + rc_compute_sources_for_writemask(ptr, writemask, srcmasks); + for (src = 0; src < opcode->NumSrcRegs; src++) { + mark_used(&s, ptr->U.I.SrcReg[src].File, ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + push_loop(&s); + break; + } + case RC_OPCODE_BRK: { + struct loopinfo *loop = &s.LoopStack[s.LoopStackSize - 1]; + memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R)); + break; + } + case RC_OPCODE_BGNLOOP: + s.LoopStackSize--; + break; + case RC_OPCODE_CONT: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo *branch = &s.BranchStack[s.BranchStackSize - 1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); - s.BranchStackSize--; - } else if (opcode->Opcode == RC_OPCODE_ELSE) { - if (branch->HaveElse) { - rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__); - } else { - memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); - memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); - branch->HaveElse = 1; - } - } else { - rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name); - } - } - } + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name); + } + } + } - update_instruction(&s, inst); - } + update_instruction(&s, inst); + } - ip = 0; - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - int dead = 1; - unsigned int srcmasks[3]; - unsigned int usemask; + ip = 0; + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next, ++ip) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; - if (!opcode->HasDstReg) { - dead = 0; - } else { - inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; - if (s.Instructions[ip].WriteMask) - dead = 0; + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; - if (s.Instructions[ip].WriteALUResult) - dead = 0; - else - inst->U.I.WriteALUResult = RC_ALURESULT_NONE; - } + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } - if (dead) { - struct rc_instruction * todelete = inst; - inst = inst->Prev; - rc_remove_instruction(todelete); - continue; - } + if (dead) { + struct rc_instruction *todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } - usemask = s.Instructions[ip].WriteMask; + usemask = s.Instructions[ip].WriteMask; - if (inst->U.I.WriteALUResult == RC_ALURESULT_X) - usemask |= RC_MASK_X; - else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) - usemask |= RC_MASK_W; + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; - rc_compute_sources_for_writemask(inst, usemask, srcmasks); + rc_compute_sources_for_writemask(inst, usemask, srcmasks); - for(unsigned int src = 0; src < 3; ++src) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(srcmasks[src], chan)) - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); - } - } - } + for (unsigned int src = 0; src < 3; ++src) { + for (unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } - rc_calculate_inputs_outputs(c); + rc_calculate_inputs_outputs(c); } diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c index 192d83d606b..d30d15e4af2 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -14,77 +14,76 @@ #include "radeon_compiler_util.h" #include "radeon_swizzle.h" -static unsigned int get_swizzle_split(struct radeon_compiler * c, - struct rc_swizzle_split * split, struct rc_instruction * inst, - unsigned src, unsigned * usemask) +static unsigned int +get_swizzle_split(struct radeon_compiler *c, struct rc_swizzle_split *split, + struct rc_instruction *inst, unsigned src, unsigned *usemask) { - *usemask = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) - *usemask |= 1 << chan; - } + *usemask = 0; + for (unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + *usemask |= 1 << chan; + } - c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split); - return split->NumPhases; + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split); + return split->NumPhases; } -static void rewrite_source(struct radeon_compiler * c, - struct rc_instruction * inst, unsigned src) +static void +rewrite_source(struct radeon_compiler *c, struct rc_instruction *inst, unsigned src) { - struct rc_swizzle_split split; - unsigned int tempreg = rc_find_free_temporary(c); - unsigned int usemask; + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; - get_swizzle_split(c, &split, inst, src, &usemask); + get_swizzle_split(c, &split, inst, src, &usemask); - for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { - struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); - unsigned int masked_negate; + for (unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction *mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int masked_negate; - mov->U.I.Opcode = RC_OPCODE_MOV; - mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - mov->U.I.DstReg.Index = tempreg; - mov->U.I.DstReg.WriteMask = split.Phase[phase]; - mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; - mov->U.I.PreSub = inst->U.I.PreSub; + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; - /* RC_OPCODE_KIL will trigger if the value is -0 and TEX srcs don't have negate - * so considering something like this pattern - * 0: ADD temp[1].x, input[0].w___, const[0].-x___; - * 1: CMP temp[2].x, temp[1].x___, none.1___, none.0___; - * 2: KIL -temp[2].xxxx; - * we don't want to insert MOV, because HW docs advise we tranlate MOV to MAX - * (with RC_OPCODE_DISABLE) and this in turn will mean the KIL will always - * trigger (as it will have either -1 or -0). So emit here ADD src0 + 0 instead. - */ - if (inst->U.I.Opcode == RC_OPCODE_KIL) { - assert(!phase); - mov->U.I.Opcode = RC_OPCODE_ADD; - mov->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; - } + /* RC_OPCODE_KIL will trigger if the value is -0 and TEX srcs don't have negate + * so considering something like this pattern + * 0: ADD temp[1].x, input[0].w___, const[0].-x___; + * 1: CMP temp[2].x, temp[1].x___, none.1___, none.0___; + * 2: KIL -temp[2].xxxx; + * we don't want to insert MOV, because HW docs advise we tranlate MOV to MAX + * (with RC_OPCODE_DISABLE) and this in turn will mean the KIL will always + * trigger (as it will have either -1 or -0). So emit here ADD src0 + 0 instead. + */ + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + assert(!phase); + mov->U.I.Opcode = RC_OPCODE_ADD; + mov->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; + } - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(split.Phase[phase], chan)) - SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); - } + for (unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + } - masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; - if (masked_negate == 0) - mov->U.I.SrcReg[0].Negate = 0; - else if (masked_negate == split.Phase[phase]) - mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + } - } - - inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[src].Index = tempreg; - inst->U.I.SrcReg[src].Swizzle = 0; - inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; - inst->U.I.SrcReg[src].Abs = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, - GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); - } + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for (unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } } /** @@ -92,494 +91,481 @@ static void rewrite_source(struct radeon_compiler * c, * immediate registers by rearranging the immediates to allow the * instruction to use native swizzles. */ -static unsigned try_rewrite_constant(struct radeon_compiler *c, - struct rc_src_register *reg) +static unsigned +try_rewrite_constant(struct radeon_compiler *c, struct rc_src_register *reg) { - unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; - unsigned all_inline = 0; - bool w_inline_constant = false; - float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; + unsigned all_inline = 0; + bool w_inline_constant = false; + float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { - /* The register does not contain immediates, but if all - * the swizzles are inline constants, we can still rewrite - * it. */ + if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { + /* The register does not contain immediates, but if all + * the swizzles are inline constants, we can still rewrite + * it. */ - new_swizzle = RC_SWIZZLE_XYZW; - for (chan = 0 ; chan < 4; chan++) { - unsigned swz = GET_SWZ(reg->Swizzle, chan); - if (swz <= RC_SWIZZLE_W) { - return 0; - } - if (swz == RC_SWIZZLE_UNUSED) { - SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); - } - } - all_inline = 1; - } else { - new_swizzle = reg->Swizzle; - } + new_swizzle = RC_SWIZZLE_XYZW; + for (chan = 0; chan < 4; chan++) { + unsigned swz = GET_SWZ(reg->Swizzle, chan); + if (swz <= RC_SWIZZLE_W) { + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) { + SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + all_inline = 1; + } else { + new_swizzle = reg->Swizzle; + } - swz = RC_SWIZZLE_UNUSED; - found_swizzle = 1; - /* Check if all channels have the same swizzle. If they do we can skip - * the search for a native swizzle. We only need to check the first - * three channels, because any swizzle is legal in the fourth channel. - */ - for (chan = 0; chan < 3; chan++) { - unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); - if (chan_swz == RC_SWIZZLE_UNUSED) { - continue; - } - if (swz == RC_SWIZZLE_UNUSED) { - swz = chan_swz; - } else if (swz != chan_swz) { - found_swizzle = 0; - break; - } - } + swz = RC_SWIZZLE_UNUSED; + found_swizzle = 1; + /* Check if all channels have the same swizzle. If they do we can skip + * the search for a native swizzle. We only need to check the first + * three channels, because any swizzle is legal in the fourth channel. + */ + for (chan = 0; chan < 3; chan++) { + unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); + if (chan_swz == RC_SWIZZLE_UNUSED) { + continue; + } + if (swz == RC_SWIZZLE_UNUSED) { + swz = chan_swz; + } else if (swz != chan_swz) { + found_swizzle = 0; + break; + } + } - /* Find a legal swizzle */ + /* Find a legal swizzle */ - /* This loop attempts to find a native swizzle where all the - * channels are different. */ - while (!found_swizzle && !all_inline) { - swz0 = GET_SWZ(new_swizzle, 0); - swz1 = GET_SWZ(new_swizzle, 1); - swz2 = GET_SWZ(new_swizzle, 2); + /* This loop attempts to find a native swizzle where all the + * channels are different. */ + while (!found_swizzle && !all_inline) { + swz0 = GET_SWZ(new_swizzle, 0); + swz1 = GET_SWZ(new_swizzle, 1); + swz2 = GET_SWZ(new_swizzle, 2); - /* Swizzle .W. is never legal. */ - if (swz1 == RC_SWIZZLE_W || - swz1 == RC_SWIZZLE_UNUSED || - swz1 == RC_SWIZZLE_ZERO || - swz1 == RC_SWIZZLE_HALF || - swz1 == RC_SWIZZLE_ONE) { - /* We chose Z, because there are two non-repeating - * swizzle combinations of the form .Z. There are - * only one combination each for .X. and .Y. */ - SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); - continue; - } + /* Swizzle .W. is never legal. */ + if (swz1 == RC_SWIZZLE_W || swz1 == RC_SWIZZLE_UNUSED || swz1 == RC_SWIZZLE_ZERO || + swz1 == RC_SWIZZLE_HALF || swz1 == RC_SWIZZLE_ONE) { + /* We chose Z, because there are two non-repeating + * swizzle combinations of the form .Z. There are + * only one combination each for .X. and .Y. */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + continue; + } - if (swz2 == RC_SWIZZLE_UNUSED) { - /* We choose Y, because there are two non-repeating - * swizzle combinations of the form ..Y */ - SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); - continue; - } + if (swz2 == RC_SWIZZLE_UNUSED) { + /* We choose Y, because there are two non-repeating + * swizzle combinations of the form ..Y */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + continue; + } - switch (swz0) { - /* X.. */ - case RC_SWIZZLE_X: - /* Legal swizzles that start with X: XYZ, XXX */ - switch (swz1) { - /* XX. */ - case RC_SWIZZLE_X: - /* The new swizzle will be: - * ZXY (XX. => ZX. => ZXY) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); - break; - /* XY. */ - case RC_SWIZZLE_Y: - /* The new swizzle is XYZ */ - SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); - found_swizzle = 1; - break; - /* XZ. */ - case RC_SWIZZLE_Z: - /* XZZ */ - if (swz2 == RC_SWIZZLE_Z) { - /* The new swizzle is XYZ */ - SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); - found_swizzle = 1; - } else { /* XZ[^Z] */ - /* The new swizzle will be: - * YZX (XZ. => YZ. => YZX) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); - } - break; - /* XW. Should have already been handled. */ - case RC_SWIZZLE_W: - assert(0); - break; - } - break; - /* Y.. */ - case RC_SWIZZLE_Y: - /* Legal swizzles that start with Y: YYY, YZX */ - switch (swz1) { - /* YY. */ - case RC_SWIZZLE_Y: - /* The new swizzle will be: - * XYZ (YY. => XY. => XYZ) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); - break; - /* YZ. */ - case RC_SWIZZLE_Z: - /* The new swizzle is YZX */ - SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); - found_swizzle = 1; - break; - /* YX. */ - case RC_SWIZZLE_X: - /* YXX */ - if (swz2 == RC_SWIZZLE_X) { - /*The new swizzle is YZX */ - SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); - found_swizzle = 1; - } else { /* YX[^X] */ - /* The new swizzle will be: - * ZXY (YX. => ZX. -> ZXY) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); - } - break; - /* YW. Should have already been handled. */ - case RC_SWIZZLE_W: - assert(0); - break; - } - break; - /* Z.. */ - case RC_SWIZZLE_Z: - /* Legal swizzles that start with Z: ZZZ, ZXY */ - switch (swz1) { - /* ZZ. */ - case RC_SWIZZLE_Z: - /* The new swizzle will be: - * WZY (ZZ. => WZ. => WZY) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); - break; - /* ZX. */ - case RC_SWIZZLE_X: - /* The new swizzle is ZXY */ - SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); - found_swizzle = 1; - break; - /* ZY. */ - case RC_SWIZZLE_Y: - /* ZYY */ - if (swz2 == RC_SWIZZLE_Y) { - /* The new swizzle is ZXY */ - SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); - found_swizzle = 1; - } else { /* ZY[^Y] */ - /* The new swizzle will be: - * XYZ (ZY. => XY. => XYZ) */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); - } - break; - /* ZW. Should have already been handled. */ - case RC_SWIZZLE_W: - assert(0); - break; - } - break; + switch (swz0) { + /* X.. */ + case RC_SWIZZLE_X: + /* Legal swizzles that start with X: XYZ, XXX */ + switch (swz1) { + /* XX. */ + case RC_SWIZZLE_X: + /* The new swizzle will be: + * ZXY (XX. => ZX. => ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + break; + /* XY. */ + case RC_SWIZZLE_Y: + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); + found_swizzle = 1; + break; + /* XZ. */ + case RC_SWIZZLE_Z: + /* XZZ */ + if (swz2 == RC_SWIZZLE_Z) { + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); + found_swizzle = 1; + } else { /* XZ[^Z] */ + /* The new swizzle will be: + * YZX (XZ. => YZ. => YZX) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); + } + break; + /* XW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Y.. */ + case RC_SWIZZLE_Y: + /* Legal swizzles that start with Y: YYY, YZX */ + switch (swz1) { + /* YY. */ + case RC_SWIZZLE_Y: + /* The new swizzle will be: + * XYZ (YY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + break; + /* YZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle is YZX */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); + found_swizzle = 1; + break; + /* YX. */ + case RC_SWIZZLE_X: + /* YXX */ + if (swz2 == RC_SWIZZLE_X) { + /*The new swizzle is YZX */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* YX[^X] */ + /* The new swizzle will be: + * ZXY (YX. => ZX. -> ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + } + break; + /* YW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Z.. */ + case RC_SWIZZLE_Z: + /* Legal swizzles that start with Z: ZZZ, ZXY */ + switch (swz1) { + /* ZZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle will be: + * WZY (ZZ. => WZ. => WZY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); + break; + /* ZX. */ + case RC_SWIZZLE_X: + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* ZY. */ + case RC_SWIZZLE_Y: + /* ZYY */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); + found_swizzle = 1; + } else { /* ZY[^Y] */ + /* The new swizzle will be: + * XYZ (ZY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + /* ZW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; - /* W.. */ - case RC_SWIZZLE_W: - /* Legal swizzles that start with X: WWW, WZY */ - switch (swz1) { - /* WW. Should have already been handled. */ - case RC_SWIZZLE_W: - assert(0); - break; - /* WZ. */ - case RC_SWIZZLE_Z: - /* The new swizzle will be WZY */ - SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); - found_swizzle = 1; - break; - /* WX. */ - case RC_SWIZZLE_X: - /* WY. */ - case RC_SWIZZLE_Y: - /* W[XY]Y */ - if (swz2 == RC_SWIZZLE_Y) { - /* The new swizzle will be WZY */ - SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); - found_swizzle = 1; - } else { /* W[XY][^Y] */ - /* The new swizzle will be: - * ZXY (WX. => XX. => ZX. => ZXY) or - * XYZ (WY. => XY. => XYZ) - */ - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); - } - break; - } - break; - /* U.. 0.. 1.. H..*/ - case RC_SWIZZLE_UNUSED: - case RC_SWIZZLE_ZERO: - case RC_SWIZZLE_ONE: - case RC_SWIZZLE_HALF: - SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); - break; - } - } + /* W.. */ + case RC_SWIZZLE_W: + /* Legal swizzles that start with X: WWW, WZY */ + switch (swz1) { + /* WW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + /* WZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* WX. */ + case RC_SWIZZLE_X: + /* WY. */ + case RC_SWIZZLE_Y: + /* W[XY]Y */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* W[XY][^Y] */ + /* The new swizzle will be: + * ZXY (WX. => XX. => ZX. => ZXY) or + * XYZ (WY. => XY. => XYZ) + */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + } + break; + /* U.. 0.. 1.. H..*/ + case RC_SWIZZLE_UNUSED: + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_ONE: + case RC_SWIZZLE_HALF: + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + break; + } + } - /* Handle the swizzle in the w channel. */ - swz3 = GET_SWZ(reg->Swizzle, 3); + /* Handle the swizzle in the w channel. */ + swz3 = GET_SWZ(reg->Swizzle, 3); - /* We can skip this if the swizzle in channel w is an inline constant. */ - if (is_swizzle_inline_constant(swz3)) { - w_inline_constant = true; - } else { - for (chan = 0; chan < 3; chan++) { - unsigned old_swz = GET_SWZ(reg->Swizzle, chan); - unsigned new_swz = GET_SWZ(new_swizzle, chan); - /* If the swizzle in the w channel is the same as the - * swizzle in any other channels, we need to rewrite it. - * For example: - * reg->Swizzle == XWZW - * new_swizzle == XYZX - * Since the swizzle in the y channel is being - * rewritten from W -> Y we need to change the swizzle - * in the w channel from W -> Y as well. - */ - if (old_swz == swz3) { - SET_SWZ(new_swizzle, 3, - GET_SWZ(new_swizzle, chan)); - break; - } + /* We can skip this if the swizzle in channel w is an inline constant. */ + if (is_swizzle_inline_constant(swz3)) { + w_inline_constant = true; + } else { + for (chan = 0; chan < 3; chan++) { + unsigned old_swz = GET_SWZ(reg->Swizzle, chan); + unsigned new_swz = GET_SWZ(new_swizzle, chan); + /* If the swizzle in the w channel is the same as the + * swizzle in any other channels, we need to rewrite it. + * For example: + * reg->Swizzle == XWZW + * new_swizzle == XYZX + * Since the swizzle in the y channel is being + * rewritten from W -> Y we need to change the swizzle + * in the w channel from W -> Y as well. + */ + if (old_swz == swz3) { + SET_SWZ(new_swizzle, 3, GET_SWZ(new_swizzle, chan)); + break; + } - /* The swizzle in channel w will be overwritten by one - * of the new swizzles. */ - if (new_swz == swz3) { - /* Find an unused swizzle */ - unsigned i; - unsigned used = 0; - for (i = 0; i < 3; i++) { - used |= 1 << GET_SWZ(new_swizzle, i); - } - for (i = 0; i < 4; i++) { - if (used & (1 << i)) { - continue; - } - SET_SWZ(new_swizzle, 3, i); - } - } - } - } + /* The swizzle in channel w will be overwritten by one + * of the new swizzles. */ + if (new_swz == swz3) { + /* Find an unused swizzle */ + unsigned i; + unsigned used = 0; + for (i = 0; i < 3; i++) { + used |= 1 << GET_SWZ(new_swizzle, i); + } + for (i = 0; i < 4; i++) { + if (used & (1 << i)) { + continue; + } + SET_SWZ(new_swizzle, 3, i); + } + } + } + } - for (chan = 0; chan < 4; chan++) { - unsigned old_swz = GET_SWZ(reg->Swizzle, chan); - unsigned new_swz = GET_SWZ(new_swizzle, chan); + for (chan = 0; chan < 4; chan++) { + unsigned old_swz = GET_SWZ(reg->Swizzle, chan); + unsigned new_swz = GET_SWZ(new_swizzle, chan); - if (old_swz == RC_SWIZZLE_UNUSED) { - continue; - } + if (old_swz == RC_SWIZZLE_UNUSED) { + continue; + } - /* We don't need to change the swizzle in channel w if it is - * an inline constant. These are always legal in the w channel. - * - * Swizzles with a value > RC_SWIZZLE_W are inline constants. - */ - if (chan == 3 && w_inline_constant) { - continue; - } + /* We don't need to change the swizzle in channel w if it is + * an inline constant. These are always legal in the w channel. + * + * Swizzles with a value > RC_SWIZZLE_W are inline constants. + */ + if (chan == 3 && w_inline_constant) { + continue; + } - if (new_swz > RC_SWIZZLE_W) { - rc_error(c, "Bad swizzle in try_rewrite_constant()"); - new_swz = RC_SWIZZLE_X; - } + if (new_swz > RC_SWIZZLE_W) { + rc_error(c, "Bad swizzle in try_rewrite_constant()"); + new_swz = RC_SWIZZLE_X; + } - switch (old_swz) { - case RC_SWIZZLE_ZERO: - imms[new_swz] = 0.0f; - break; - case RC_SWIZZLE_HALF: - if (reg->Negate & (1 << chan)) { - imms[new_swz] = -0.5f; - } else { - imms[new_swz] = 0.5f; - } - break; - case RC_SWIZZLE_ONE: - if (reg->Negate & (1 << chan)) { - imms[new_swz] = -1.0f; - } else { - imms[new_swz] = 1.0f; - } - break; - default: - imms[new_swz] = rc_get_constant_value(c, reg->Index, - reg->Swizzle, reg->Negate, chan); - } - SET_SWZ(reg->Swizzle, chan, new_swz); - } - reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, - imms); - /* We need to set the register file to CONSTANT in case we are - * converting a non-constant register with constant swizzles (e.g. - * ONE, ZERO, HALF). - */ - reg->File = RC_FILE_CONSTANT; - reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0; - return 1; + switch (old_swz) { + case RC_SWIZZLE_ZERO: + imms[new_swz] = 0.0f; + break; + case RC_SWIZZLE_HALF: + if (reg->Negate & (1 << chan)) { + imms[new_swz] = -0.5f; + } else { + imms[new_swz] = 0.5f; + } + break; + case RC_SWIZZLE_ONE: + if (reg->Negate & (1 << chan)) { + imms[new_swz] = -1.0f; + } else { + imms[new_swz] = 1.0f; + } + break; + default: + imms[new_swz] = rc_get_constant_value(c, reg->Index, reg->Swizzle, reg->Negate, chan); + } + SET_SWZ(reg->Swizzle, chan, new_swz); + } + reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, imms); + /* We need to set the register file to CONSTANT in case we are + * converting a non-constant register with constant swizzles (e.g. + * ONE, ZERO, HALF). + */ + reg->File = RC_FILE_CONSTANT; + reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0; + return 1; } /** * Set all channels not specified by writemaks to unused. */ -static void clear_channels(struct rc_instruction * inst, unsigned writemask) +static void +clear_channels(struct rc_instruction *inst, unsigned writemask) { - inst->U.I.DstReg.WriteMask = writemask; - for (unsigned chan = 0; chan < 4; chan++) { - if (writemask & (1 << chan)) - continue; + inst->U.I.DstReg.WriteMask = writemask; + for (unsigned chan = 0; chan < 4; chan++) { + if (writemask & (1 << chan)) + continue; - const struct rc_opcode_info * opcode = - rc_get_opcode_info(inst->U.I.Opcode); - for (unsigned src = 0; src < opcode->NumSrcRegs; src++) { - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); - } - } - /* TODO: We could in theory add constant swizzles back as well, - * they will be all legal when we have just a single channel, - * to save some sources and help the pair scheduling later. */ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (unsigned src = 0; src < opcode->NumSrcRegs; src++) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + /* TODO: We could in theory add constant swizzles back as well, + * they will be all legal when we have just a single channel, + * to save some sources and help the pair scheduling later. */ } -static bool try_splitting_single_channel(struct radeon_compiler * c, - struct rc_instruction * inst) +static bool +try_splitting_single_channel(struct radeon_compiler *c, struct rc_instruction *inst) { - for (unsigned chan = 0; chan < 3; chan++) { - struct rc_instruction * new_inst; - new_inst = rc_insert_new_instruction(c, inst); - memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction)); - clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan)); + for (unsigned chan = 0; chan < 3; chan++) { + struct rc_instruction *new_inst; + new_inst = rc_insert_new_instruction(c, inst); + memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction)); + clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan)); - const struct rc_opcode_info * opcode = - rc_get_opcode_info(new_inst->U.I.Opcode); - bool valid_swizzles = true; + const struct rc_opcode_info *opcode = rc_get_opcode_info(new_inst->U.I.Opcode); + bool valid_swizzles = true; - for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) { - struct rc_src_register *reg = &new_inst->U.I.SrcReg[src]; + for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_src_register *reg = &new_inst->U.I.SrcReg[src]; - if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg)) - valid_swizzles = false; - } + if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg)) + valid_swizzles = false; + } - if (!valid_swizzles) { - rc_remove_instruction(new_inst); - } else { - clear_channels(inst, 1 << chan); - return true; - } - } - return false; + if (!valid_swizzles) { + rc_remove_instruction(new_inst); + } else { + clear_channels(inst, 1 << chan); + return true; + } + } + return false; } -static bool try_splitting_instruction(struct radeon_compiler * c, - struct rc_instruction * inst) +static bool +try_splitting_instruction(struct radeon_compiler *c, struct rc_instruction *inst) { - /* Adding more output instructions in FS is bad for performance. */ - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) - return false; + /* Adding more output instructions in FS is bad for performance. */ + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + return false; - /* When only single channel of the swizzle is wrong, like xwzw, - * it is best to just split the single channel out. - */ - if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW || - inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) { - if (try_splitting_single_channel(c, inst)) - return true; - } + /* When only single channel of the swizzle is wrong, like xwzw, + * it is best to just split the single channel out. + */ + if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW || inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) { + if (try_splitting_single_channel(c, inst)) + return true; + } - for (unsigned chan = 0; chan < 3; chan++) { - if (!(inst->U.I.DstReg.WriteMask & (1 << chan))) - continue; + for (unsigned chan = 0; chan < 3; chan++) { + if (!(inst->U.I.DstReg.WriteMask & (1 << chan))) + continue; - unsigned next_chan; - for (next_chan = chan + 1; next_chan < 4; next_chan++) { - if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan))) - continue; + unsigned next_chan; + for (next_chan = chan + 1; next_chan < 4; next_chan++) { + if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan))) + continue; - /* We don't want to split the last used x/y/z channel and the - * w channel. Pair scheduling might be able to put it back - * together, but we don't trust it that much. - * - * Next is W already, rewrite the original inst and we are done. - */ - if (next_chan == 3) { - clear_channels(inst, (1 << chan) | (1 << next_chan)); - return true; - } + /* We don't want to split the last used x/y/z channel and the + * w channel. Pair scheduling might be able to put it back + * together, but we don't trust it that much. + * + * Next is W already, rewrite the original inst and we are done. + */ + if (next_chan == 3) { + clear_channels(inst, (1 << chan) | (1 << next_chan)); + return true; + } - struct rc_instruction * new_inst; - new_inst = rc_insert_new_instruction(c, inst->Prev); - memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction)); - clear_channels(new_inst, 1 << chan); - break; - } + struct rc_instruction *new_inst; + new_inst = rc_insert_new_instruction(c, inst->Prev); + memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction)); + clear_channels(new_inst, 1 << chan); + break; + } - /* No next chan */ - if (next_chan == 4) { - clear_channels(inst, 1 << chan); - return true; - } - } - assert(0 && "Unreachable\n"); - return false; + /* No next chan */ + if (next_chan == 4) { + clear_channels(inst, 1 << chan); + return true; + } + } + assert(0 && "Unreachable\n"); + return false; } -void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) +void +rc_dataflow_swizzles(struct radeon_compiler *c, void *user) { - struct rc_instruction * inst; + struct rc_instruction *inst; - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * opcode = - rc_get_opcode_info(inst->U.I.Opcode); - unsigned src, usemask; - unsigned total_splits = 0; - struct rc_swizzle_split split; + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned src, usemask; + unsigned total_splits = 0; + struct rc_swizzle_split split; - /* If multiple sources needs splitting or some source needs to split - * too many times, it is actually better to just split the whole ALU - * instruction to separate channels instead of inserting extra movs. - */ - for (src = 0; src < opcode->NumSrcRegs; ++src) { - /* Don't count invalid swizzles from immediates, we can just - * insert new immediates with the correct order later. - */ - if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File, - inst->U.I.SrcReg[src].Index) - && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) { - total_splits++; - } else { - total_splits += get_swizzle_split(c, &split, inst, - src, &usemask); - } - } + /* If multiple sources needs splitting or some source needs to split + * too many times, it is actually better to just split the whole ALU + * instruction to separate channels instead of inserting extra movs. + */ + for (src = 0; src < opcode->NumSrcRegs; ++src) { + /* Don't count invalid swizzles from immediates, we can just + * insert new immediates with the correct order later. + */ + if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index) && + c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) { + total_splits++; + } else { + total_splits += get_swizzle_split(c, &split, inst, src, &usemask); + } + } - /* Even if there is only a single split, i.e., two extra movs, this still - * accounts to three instructions, the same as when we split - * the original instruction right away. - */ - if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) { - if (try_splitting_instruction(c, inst)) - continue; - } + /* Even if there is only a single split, i.e., two extra movs, this still + * accounts to three instructions, the same as when we split + * the original instruction right away. + */ + if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) { + if (try_splitting_instruction(c, inst)) + continue; + } - /* For texturing or non-componentwise opcodes we do the old way - * of adding extra movs. - */ - for(src = 0; src < opcode->NumSrcRegs; ++src) { - struct rc_src_register *reg = &inst->U.I.SrcReg[src]; - if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { - continue; - } - if (!c->is_r500 && - c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && - (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) && - try_rewrite_constant(c, reg)) { - continue; - } - rewrite_source(c, inst, src); - } - } - if (c->Debug & RC_DBG_LOG) - rc_constants_print(&c->Program.Constants, NULL); + /* For texturing or non-componentwise opcodes we do the old way + * of adding extra movs. + */ + for (src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_src_register *reg = &inst->U.I.SrcReg[src]; + if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { + continue; + } + if (!c->is_r500 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && + (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) && + try_rewrite_constant(c, reg)) { + continue; + } + rewrite_source(c, inst, src); + } + } + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants, NULL); } diff --git a/src/gallium/drivers/r300/compiler/radeon_inline_literals.c b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c index 31f4101e307..25ef0de6cbf 100644 --- a/src/gallium/drivers/r300/compiler/radeon_inline_literals.c +++ b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c @@ -4,18 +4,22 @@ * SPDX-License-Identifier: MIT */ +#include +#include "util/u_bitcast.h" #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_constants.h" #include "radeon_swizzle.h" -#include "util/u_bitcast.h" -#include #define VERBOSE 0 -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) +#define DBG(...) \ + do { \ + if (VERBOSE) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) /* IEEE-754: * 22:0 mantissa @@ -26,125 +30,120 @@ * 0:2 mantissa * 3:6 exponent (bias 7) */ -static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out) +static int +ieee_754_to_r300_float(float f, unsigned char *r300_float_out) { - unsigned float_bits = u_bitcast_f2u(f); - /* XXX: Handle big-endian */ - unsigned mantissa = float_bits & 0x007fffff; - unsigned biased_exponent = (float_bits & 0x7f800000) >> 23; - unsigned negate = !!(float_bits & 0x80000000); - int exponent = biased_exponent - 127; - unsigned mantissa_mask = 0xff8fffff; - unsigned r300_exponent, r300_mantissa; + unsigned float_bits = u_bitcast_f2u(f); + /* XXX: Handle big-endian */ + unsigned mantissa = float_bits & 0x007fffff; + unsigned biased_exponent = (float_bits & 0x7f800000) >> 23; + unsigned negate = !!(float_bits & 0x80000000); + int exponent = biased_exponent - 127; + unsigned mantissa_mask = 0xff8fffff; + unsigned r300_exponent, r300_mantissa; - DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits); - DBG("Raw exponent = %d\n", exponent); + DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits); + DBG("Raw exponent = %d\n", exponent); - if (exponent < -7 || exponent > 8) { - DBG("Failed exponent out of range\n\n"); - return 0; - } + if (exponent < -7 || exponent > 8) { + DBG("Failed exponent out of range\n\n"); + return 0; + } - if (mantissa & mantissa_mask) { - DBG("Failed mantissa has too many bits:\n" - "mantissa=0x%x mantissa_mask=0x%x, and=0x%x\n\n", - mantissa, mantissa_mask, - mantissa & mantissa_mask); - return 0; - } + if (mantissa & mantissa_mask) { + DBG("Failed mantissa has too many bits:\n" + "mantissa=0x%x mantissa_mask=0x%x, and=0x%x\n\n", + mantissa, mantissa_mask, mantissa & mantissa_mask); + return 0; + } - r300_exponent = exponent + 7; - r300_mantissa = (mantissa & ~mantissa_mask) >> 20; - *r300_float_out = r300_mantissa | (r300_exponent << 3); + r300_exponent = exponent + 7; + r300_mantissa = (mantissa & ~mantissa_mask) >> 20; + *r300_float_out = r300_mantissa | (r300_exponent << 3); - DBG("Success! r300_float = 0x%x\n\n", *r300_float_out); + DBG("Success! r300_float = 0x%x\n\n", *r300_float_out); - if (negate) - return -1; - else - return 1; + if (negate) + return -1; + else + return 1; } -void rc_inline_literals(struct radeon_compiler *c, void *user) +void +rc_inline_literals(struct radeon_compiler *c, void *user) { - struct rc_instruction * inst; + struct rc_instruction *inst; - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); - unsigned src_idx; - struct rc_constant * constant; - float float_value; - unsigned char r300_float = 0; - int ret; + unsigned src_idx; + struct rc_constant *constant; + float float_value; + unsigned char r300_float = 0; + int ret; - /* XXX: Handle presub */ + /* XXX: Handle presub */ - /* We aren't using rc_for_all_reads_src here, because presub - * sources need to be handled differently. */ - for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) { - unsigned use_literal = 0; - unsigned swz, chan; - struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx]; - if (src_reg.File != RC_FILE_CONSTANT) { - continue; - } - constant = - &c->Program.Constants.Constants[src_reg.Index]; - if (constant->Type != RC_CONSTANT_IMMEDIATE) { - continue; - } - for (chan = 0; chan < 4; chan++) { - unsigned char r300_float_tmp; - swz = GET_SWZ(src_reg.Swizzle, chan); - if (swz >= RC_SWIZZLE_ZERO) { - continue; - } - float_value = constant->u.Immediate[swz]; - ret = ieee_754_to_r300_float(float_value, - &r300_float_tmp); - if (!ret || (use_literal && - r300_float != r300_float_tmp)) { - use_literal = 0; - break; - } + /* We aren't using rc_for_all_reads_src here, because presub + * sources need to be handled differently. */ + for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) { + unsigned use_literal = 0; + unsigned swz, chan; + struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx]; + if (src_reg.File != RC_FILE_CONSTANT) { + continue; + } + constant = &c->Program.Constants.Constants[src_reg.Index]; + if (constant->Type != RC_CONSTANT_IMMEDIATE) { + continue; + } + for (chan = 0; chan < 4; chan++) { + unsigned char r300_float_tmp; + swz = GET_SWZ(src_reg.Swizzle, chan); + if (swz >= RC_SWIZZLE_ZERO) { + continue; + } + float_value = constant->u.Immediate[swz]; + ret = ieee_754_to_r300_float(float_value, &r300_float_tmp); + if (!ret || (use_literal && r300_float != r300_float_tmp)) { + use_literal = 0; + break; + } - if (ret == -1 && src_reg.Abs) { - use_literal = 0; - break; - } + if (ret == -1 && src_reg.Abs) { + use_literal = 0; + break; + } - if (!use_literal) { - r300_float = r300_float_tmp; - use_literal = 1; - } + if (!use_literal) { + r300_float = r300_float_tmp; + use_literal = 1; + } - /* We can use any swizzle, so if this is ADD it might - * be smart to us the same swizzle as the other src uses - * so that we potentially enable presubtract later. - * Use RC_SWIZZLE_W otherwise, so it will become one of - * the alpha sources. - */ - if (info->Opcode == RC_OPCODE_ADD && - GET_SWZ(inst->U.I.SrcReg[1 - src_idx].Swizzle, chan) == chan) { - SET_SWZ(src_reg.Swizzle, chan, chan); - } else { - SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W); - } - if (ret == -1) { - src_reg.Negate ^= (1 << chan); - } - } + /* We can use any swizzle, so if this is ADD it might + * be smart to us the same swizzle as the other src uses + * so that we potentially enable presubtract later. + * Use RC_SWIZZLE_W otherwise, so it will become one of + * the alpha sources. + */ + if (info->Opcode == RC_OPCODE_ADD && + GET_SWZ(inst->U.I.SrcReg[1 - src_idx].Swizzle, chan) == chan) { + SET_SWZ(src_reg.Swizzle, chan, chan); + } else { + SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W); + } + if (ret == -1) { + src_reg.Negate ^= (1 << chan); + } + } - src_reg.File = RC_FILE_INLINE; - src_reg.Index = r300_float; - if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) { - continue; - } - inst->U.I.SrcReg[src_idx] = src_reg; - } - } + src_reg.File = RC_FILE_INLINE; + src_reg.Index = r300_float; + if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) { + continue; + } + inst->U.I.SrcReg[src_idx] = src_reg; + } + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c index 9e95a6bc12d..a869e2d4625 100644 --- a/src/gallium/drivers/r300/compiler/radeon_list.c +++ b/src/gallium/drivers/r300/compiler/radeon_list.c @@ -5,64 +5,70 @@ #include "radeon_list.h" -#include #include +#include #include "memory_pool.h" -struct rc_list * rc_list(struct memory_pool * pool, void * item) +struct rc_list * +rc_list(struct memory_pool *pool, void *item) { - struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); - new->Item = item; - new->Next = NULL; - new->Prev = NULL; + struct rc_list *new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; - return new; + return new; } -void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +void +rc_list_add(struct rc_list **list, struct rc_list *new_value) { - struct rc_list * temp; + struct rc_list *temp; - if (*list == NULL) { - *list = new_value; - return; - } + if (*list == NULL) { + *list = new_value; + return; + } - for (temp = *list; temp->Next; temp = temp->Next); + for (temp = *list; temp->Next; temp = temp->Next) + ; - temp->Next = new_value; - new_value->Prev = temp; + temp->Next = new_value; + new_value->Prev = temp; } -void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +void +rc_list_remove(struct rc_list **list, struct rc_list *rm_value) { - if (*list == rm_value) { - *list = rm_value->Next; - return; - } + if (*list == rm_value) { + *list = rm_value->Next; + return; + } - rm_value->Prev->Next = rm_value->Next; - if (rm_value->Next) { - rm_value->Next->Prev = rm_value->Prev; - } + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } } -unsigned int rc_list_count(struct rc_list * list) +unsigned int +rc_list_count(struct rc_list *list) { - unsigned int count = 0; - while (list) { - count++; - list = list->Next; - } - return count; + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; } -void rc_list_print(struct rc_list * list) +void +rc_list_print(struct rc_list *list) { - while(list) { - fprintf(stderr, "%p->", list->Item); - list = list->Next; - } - fprintf(stderr, "\n"); + while (list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); } diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h index d666397f572..09071ecc95a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_list.h +++ b/src/gallium/drivers/r300/compiler/radeon_list.h @@ -9,16 +9,15 @@ struct memory_pool; struct rc_list { - void * Item; - struct rc_list * Prev; - struct rc_list * Next; + void *Item; + struct rc_list *Prev; + struct rc_list *Next; }; -struct rc_list * rc_list(struct memory_pool * pool, void * item); -void rc_list_add(struct rc_list ** list, struct rc_list * new_value); -void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); -unsigned int rc_list_count(struct rc_list * list); -void rc_list_print(struct rc_list * list); +struct rc_list *rc_list(struct memory_pool *pool, void *item); +void rc_list_add(struct rc_list **list, struct rc_list *new_value); +void rc_list_remove(struct rc_list **list, struct rc_list *rm_value); +unsigned int rc_list_count(struct rc_list *list); +void rc_list_print(struct rc_list *list); #endif /* RADEON_LIST_H */ - diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c index 9d116fe9d4a..c2145959fac 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -11,502 +11,500 @@ #include "util/compiler.h" const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { - { - .Opcode = RC_OPCODE_NOP, - .Name = "NOP" - }, - { - .Opcode = RC_OPCODE_ILLEGAL_OPCODE, - .Name = "ILLEGAL OPCODE" - }, - { - .Opcode = RC_OPCODE_ADD, - .Name = "ADD", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_ARL, - .Name = "ARL", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_ARR, - .Name = "ARR", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_CMP, - .Name = "CMP", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_CND, - .Name = "CND", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_COS, - .Name = "COS", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_DDX, - .Name = "DDX", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_DDY, - .Name = "DDY", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_DP2, - .Name = "DP2", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DP3, - .Name = "DP3", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DP4, - .Name = "DP4", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DST, - .Name = "DST", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_EX2, - .Name = "EX2", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_EXP, - .Name = "EXP", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_FRC, - .Name = "FRC", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_KIL, - .Name = "KIL", - .NumSrcRegs = 1 - }, - { - .Opcode = RC_OPCODE_LG2, - .Name = "LG2", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_LIT, - .Name = "LIT", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_LOG, - .Name = "LOG", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_MAD, - .Name = "MAD", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MAX, - .Name = "MAX", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MIN, - .Name = "MIN", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MOV, - .Name = "MOV", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MUL, - .Name = "MUL", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_POW, - .Name = "POW", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_RCP, - .Name = "RCP", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_ROUND, - .Name = "ROUND", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_RSQ, - .Name = "RSQ", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_SEQ, - .Name = "SEQ", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SGE, - .Name = "SGE", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SIN, - .Name = "SIN", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_SLT, - .Name = "SLT", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SNE, - .Name = "SNE", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_TEX, - .Name = "TEX", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXB, - .Name = "TXB", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXD, - .Name = "TXD", - .HasTexture = 1, - .NumSrcRegs = 3, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXL, - .Name = "TXL", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXP, - .Name = "TXP", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_IF, - .Name = "IF", - .IsFlowControl = 1, - .NumSrcRegs = 1 - }, - { - .Opcode = RC_OPCODE_ELSE, - .Name = "ELSE", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_ENDIF, - .Name = "ENDIF", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_BGNLOOP, - .Name = "BGNLOOP", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_BRK, - .Name = "BRK", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_ENDLOOP, - .Name = "ENDLOOP", - .IsFlowControl = 1, - .NumSrcRegs = 0, - }, - { - .Opcode = RC_OPCODE_CONT, - .Name = "CONT", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_REPL_ALPHA, - .Name = "REPL_ALPHA", - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_BEGIN_TEX, - .Name = "BEGIN_TEX" - }, - { - .Opcode = RC_OPCODE_KILP, - .Name = "KILP", - }, - { - .Opcode = RC_ME_PRED_SEQ, - .Name = "ME_PRED_SEQ", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SGT, - .Name = "ME_PRED_SGT", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SGE, - .Name = "ME_PRED_SGE", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SNEQ, - .Name = "ME_PRED_SNEQ", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SET_CLR, - .Name = "ME_PRED_SET_CLEAR", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SET_INV, - .Name = "ME_PRED_SET_INV", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SET_POP, - .Name = "ME_PRED_SET_POP", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_ME_PRED_SET_RESTORE, - .Name = "ME_PRED_SET_RESTORE", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_VE_PRED_SEQ_PUSH, - .Name = "VE_PRED_SEQ_PUSH", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_VE_PRED_SGT_PUSH, - .Name = "VE_PRED_SGT_PUSH", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_VE_PRED_SGE_PUSH, - .Name = "VE_PRED_SGE_PUSH", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_VE_PRED_SNEQ_PUSH, - .Name = "VE_PRED_SNEQ_PUSH", - .NumSrcRegs = 2, - .HasDstReg = 1 - } -}; + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP", + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE", + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_ARR, + .Name = "ARR", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1, + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_ROUND, + .Name = "ROUND", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1, + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1, + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1, + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1, + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX", + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", + }, + { + .Opcode = RC_ME_PRED_SEQ, + .Name = "ME_PRED_SEQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SGT, + .Name = "ME_PRED_SGT", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SGE, + .Name = "ME_PRED_SGE", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SNEQ, + .Name = "ME_PRED_SNEQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SET_CLR, + .Name = "ME_PRED_SET_CLEAR", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SET_INV, + .Name = "ME_PRED_SET_INV", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SET_POP, + .Name = "ME_PRED_SET_POP", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_ME_PRED_SET_RESTORE, + .Name = "ME_PRED_SET_RESTORE", + .NumSrcRegs = 1, + .HasDstReg = 1, + }, + { + .Opcode = RC_VE_PRED_SEQ_PUSH, + .Name = "VE_PRED_SEQ_PUSH", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_VE_PRED_SGT_PUSH, + .Name = "VE_PRED_SGT_PUSH", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_VE_PRED_SGE_PUSH, + .Name = "VE_PRED_SGE_PUSH", + .NumSrcRegs = 2, + .HasDstReg = 1, + }, + { + .Opcode = RC_VE_PRED_SNEQ_PUSH, + .Name = "VE_PRED_SNEQ_PUSH", + .NumSrcRegs = 2, + .HasDstReg = 1, + }}; -void rc_compute_sources_for_writemask( - const struct rc_instruction *inst, - unsigned int writemask, - unsigned int *srcmasks) +void +rc_compute_sources_for_writemask(const struct rc_instruction *inst, unsigned int writemask, + unsigned int *srcmasks) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - srcmasks[0] = 0; - srcmasks[1] = 0; - srcmasks[2] = 0; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; - if (opcode->Opcode == RC_OPCODE_KIL) - srcmasks[0] |= RC_MASK_XYZW; - else if (opcode->Opcode == RC_OPCODE_IF) - srcmasks[0] |= RC_MASK_X; + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; - if (!writemask) - return; + if (!writemask) + return; - if (opcode->IsComponentwise) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= writemask; - } else if (opcode->IsStandardScalar) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= writemask; - } else { - switch(opcode->Opcode) { - case RC_OPCODE_ARL: - case RC_OPCODE_ARR: - srcmasks[0] |= RC_MASK_X; - break; - case RC_OPCODE_DP2: - srcmasks[0] |= RC_MASK_XY; - srcmasks[1] |= RC_MASK_XY; - break; - case RC_OPCODE_DP3: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZ; - break; - case RC_OPCODE_DP4: - srcmasks[0] |= RC_MASK_XYZW; - srcmasks[1] |= RC_MASK_XYZW; - break; - case RC_OPCODE_TXB: - case RC_OPCODE_TXP: - case RC_OPCODE_TXL: - srcmasks[0] |= RC_MASK_W; - FALLTHROUGH; - case RC_OPCODE_TEX: - switch (inst->U.I.TexSrcTarget) { - case RC_TEXTURE_1D: - srcmasks[0] |= RC_MASK_X; - break; - case RC_TEXTURE_2D: - case RC_TEXTURE_RECT: - case RC_TEXTURE_1D_ARRAY: - srcmasks[0] |= RC_MASK_XY; - break; - case RC_TEXTURE_3D: - case RC_TEXTURE_CUBE: - case RC_TEXTURE_2D_ARRAY: - srcmasks[0] |= RC_MASK_XYZ; - break; - } - break; - case RC_OPCODE_TXD: - switch (inst->U.I.TexSrcTarget) { - case RC_TEXTURE_1D_ARRAY: - srcmasks[0] |= RC_MASK_Y; - FALLTHROUGH; - case RC_TEXTURE_1D: - srcmasks[0] |= RC_MASK_X; - srcmasks[1] |= RC_MASK_X; - srcmasks[2] |= RC_MASK_X; - break; - case RC_TEXTURE_2D_ARRAY: - srcmasks[0] |= RC_MASK_Z; - FALLTHROUGH; - case RC_TEXTURE_2D: - case RC_TEXTURE_RECT: - srcmasks[0] |= RC_MASK_XY; - srcmasks[1] |= RC_MASK_XY; - srcmasks[2] |= RC_MASK_XY; - break; - case RC_TEXTURE_3D: - case RC_TEXTURE_CUBE: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZ; - srcmasks[2] |= RC_MASK_XYZ; - break; - } - break; - case RC_OPCODE_DST: - srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; - srcmasks[1] |= RC_MASK_Y | RC_MASK_W; - break; - case RC_OPCODE_EXP: - case RC_OPCODE_LOG: - srcmasks[0] |= RC_MASK_XY; - break; - case RC_OPCODE_LIT: - srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; - break; - default: - break; - } - } + if (opcode->IsComponentwise) { + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else { + switch (opcode->Opcode) { + case RC_OPCODE_ARL: + case RC_OPCODE_ARR: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + case RC_OPCODE_TXL: + srcmasks[0] |= RC_MASK_W; + FALLTHROUGH; + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + FALLTHROUGH; + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + FALLTHROUGH; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h index c9551fe8858..11509837dbb 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -12,210 +12,208 @@ * Opcodes understood by the Radeon compiler. */ typedef enum { - RC_OPCODE_NOP = 0, - RC_OPCODE_ILLEGAL_OPCODE, + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, - /** vec4 instruction: dst.c = src0.c + src1.c; */ - RC_OPCODE_ADD, + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, - /** special instruction: load address register - * dst.x = floor(src.x), where dst must be an address register */ - RC_OPCODE_ARL, + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, - /** special instruction: load address register with round - * dst.x = round(src.x), where dst must be an address register */ - RC_OPCODE_ARR, + /** special instruction: load address register with round + * dst.x = round(src.x), where dst must be an address register */ + RC_OPCODE_ARR, - /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ - RC_OPCODE_CMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, - /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ - RC_OPCODE_CND, + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, - /** scalar instruction: dst = cos(src0.x) */ - RC_OPCODE_COS, + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, - /** special instruction: take vec4 partial derivative in X direction - * dst.c = d src0.c / dx */ - RC_OPCODE_DDX, + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, - /** special instruction: take vec4 partial derivative in Y direction - * dst.c = d src0.c / dy */ - RC_OPCODE_DDY, + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ - RC_OPCODE_DP2, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ - RC_OPCODE_DP3, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ - RC_OPCODE_DP4, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, - /** special instruction, see ARB_fragment_program */ - RC_OPCODE_DST, + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, - /** scalar instruction: dst = 2**src0.x */ - RC_OPCODE_EX2, + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_EXP, + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, - /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ - RC_OPCODE_FRC, + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, - /** special instruction: stop execution if any component of src0 is negative */ - RC_OPCODE_KIL, + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, - /** scalar instruction: dst = log_2(src0.x) */ - RC_OPCODE_LG2, + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_LIT, + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_LOG, + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, - /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ - RC_OPCODE_MAD, + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, - /** vec4 instruction: dst.c = max(src0.c, src1.c) */ - RC_OPCODE_MAX, + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, - /** vec4 instruction: dst.c = min(src0.c, src1.c) */ - RC_OPCODE_MIN, + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, - /** vec4 instruction: dst.c = src0.c */ - RC_OPCODE_MOV, + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, - /** vec4 instruction: dst.c = src0.c*src1.c */ - RC_OPCODE_MUL, + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, - /** scalar instruction: dst = src0.x ** src1.x */ - RC_OPCODE_POW, + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, - /** scalar instruction: dst = 1 / src0.x */ - RC_OPCODE_RCP, + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, - /** vec4 instruction: dst.c = floor(src0.c + 0.5) */ - RC_OPCODE_ROUND, + /** vec4 instruction: dst.c = floor(src0.c + 0.5) */ + RC_OPCODE_ROUND, - /** scalar instruction: dst = 1 / sqrt(src0.x) */ - RC_OPCODE_RSQ, + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, - /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SEQ, + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, - /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SGE, + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, - /** scalar instruction: dst = sin(src0.x) */ - RC_OPCODE_SIN, + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, - /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SLT, + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, - /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SNE, + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, - RC_OPCODE_TEX, - RC_OPCODE_TXB, - RC_OPCODE_TXD, - RC_OPCODE_TXL, - RC_OPCODE_TXP, + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, - /** branch instruction: - * If src0.x != 0.0, continue with the next instruction; - * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. - */ - RC_OPCODE_IF, + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, - /** branch instruction: jump to matching RC_OPCODE_ENDIF */ - RC_OPCODE_ELSE, + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, - /** branch instruction: has no effect */ - RC_OPCODE_ENDIF, - - RC_OPCODE_BGNLOOP, + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, - RC_OPCODE_BRK, + RC_OPCODE_BGNLOOP, - RC_OPCODE_ENDLOOP, + RC_OPCODE_BRK, - RC_OPCODE_CONT, + RC_OPCODE_ENDLOOP, - /** special instruction, used in R300-R500 fragment program pair instructions - * indicates that the result of the alpha operation shall be replicated - * across all other channels */ - RC_OPCODE_REPL_ALPHA, + RC_OPCODE_CONT, - /** special instruction, used in R300-R500 fragment programs - * to indicate the start of a block of texture instructions that - * can run simultaneously. */ - RC_OPCODE_BEGIN_TEX, + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, - /** Stop execution of the shader (GLSL discard) */ - RC_OPCODE_KILP, + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, - /* Vertex shader CF Instructions */ - RC_ME_PRED_SEQ, - RC_ME_PRED_SGT, - RC_ME_PRED_SGE, - RC_ME_PRED_SNEQ, - RC_ME_PRED_SET_CLR, - RC_ME_PRED_SET_INV, - RC_ME_PRED_SET_POP, - RC_ME_PRED_SET_RESTORE, + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, - RC_VE_PRED_SEQ_PUSH, - RC_VE_PRED_SGT_PUSH, - RC_VE_PRED_SGE_PUSH, - RC_VE_PRED_SNEQ_PUSH, + /* Vertex shader CF Instructions */ + RC_ME_PRED_SEQ, + RC_ME_PRED_SGT, + RC_ME_PRED_SGE, + RC_ME_PRED_SNEQ, + RC_ME_PRED_SET_CLR, + RC_ME_PRED_SET_INV, + RC_ME_PRED_SET_POP, + RC_ME_PRED_SET_RESTORE, - MAX_RC_OPCODE + RC_VE_PRED_SEQ_PUSH, + RC_VE_PRED_SGT_PUSH, + RC_VE_PRED_SGE_PUSH, + RC_VE_PRED_SNEQ_PUSH, + + MAX_RC_OPCODE } rc_opcode; - struct rc_opcode_info { - rc_opcode Opcode; - const char * Name; + rc_opcode Opcode; + const char *Name; - /** true if the instruction reads from a texture. - * - * \note This is false for the KIL instruction, even though KIL is - * a texture instruction from a hardware point of view. */ - unsigned int HasTexture:1; + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture : 1; - unsigned int NumSrcRegs:2; - unsigned int HasDstReg:1; + unsigned int NumSrcRegs : 2; + unsigned int HasDstReg : 1; - /** true if this instruction affects control flow */ - unsigned int IsFlowControl:1; + /** true if this instruction affects control flow */ + unsigned int IsFlowControl : 1; - /** true if this is a vector instruction that operates on components in parallel - * without any cross-component interaction */ - unsigned int IsComponentwise:1; + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise : 1; - /** true if this instruction sources only its operands X components - * to compute one result which is smeared across all output channels */ - unsigned int IsStandardScalar:1; + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar : 1; }; extern const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; -static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +static inline const struct rc_opcode_info * +rc_get_opcode_info(rc_opcode opcode) { - assert((unsigned int)opcode < MAX_RC_OPCODE); - assert(rc_opcodes[opcode].Opcode == opcode); + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); - return &rc_opcodes[opcode]; + return &rc_opcodes[opcode]; } struct rc_instruction; -void rc_compute_sources_for_writemask( - const struct rc_instruction *inst, - unsigned int writemask, - unsigned int *srcmasks); +void rc_compute_sources_for_writemask(const struct rc_instruction *inst, unsigned int writemask, + unsigned int *srcmasks); #endif /* RADEON_OPCODES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 38c6355ca1a..cde24044f46 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -15,532 +15,496 @@ #include "radeon_variable.h" struct src_clobbered_reads_cb_data { - rc_register_file File; - unsigned int Index; - unsigned int Mask; - struct rc_reader_data * ReaderData; + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data *ReaderData; }; -typedef void (*rc_presub_replace_fn)(struct rc_instruction *, - struct rc_instruction *, - unsigned int); +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, struct rc_instruction *, + unsigned int); -static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +static struct rc_src_register +chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) { - struct rc_src_register combine; - combine.File = inner.File; - combine.Index = inner.Index; - combine.RelAddr = inner.RelAddr; - if (outer.Abs) { - combine.Abs = 1; - combine.Negate = outer.Negate; - } else { - combine.Abs = inner.Abs; - combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); - combine.Negate ^= outer.Negate; - } - combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); - return combine; + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; } -static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, - struct rc_src_register * src) +static void +copy_propagate_scan_read(void *data, struct rc_instruction *inst, struct rc_src_register *src) { - rc_register_file file = src->File; - struct rc_reader_data * reader_data = data; + rc_register_file file = src->File; + struct rc_reader_data *reader_data = data; - if(!rc_inst_can_use_presub(reader_data->C, - inst, - reader_data->Writer->U.I.PreSub.Opcode, - rc_swizzle_to_writemask(src->Swizzle), - src, - &reader_data->Writer->U.I.PreSub.SrcReg[0], - &reader_data->Writer->U.I.PreSub.SrcReg[1])) { - reader_data->Abort = 1; - return; - } + if (!rc_inst_can_use_presub(reader_data->C, inst, reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } - /* XXX This could probably be handled better. */ - if (file == RC_FILE_ADDRESS) { - reader_data->Abort = 1; - return; - } + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } - /* R300/R400 is unhappy about propagating - * 0: MOV temp[1], -none.1111; - * 1: KIL temp[1]; - * to - * 0: KIL -none.1111; - * - * R500 is fine with it. - */ - if (!reader_data->C->is_r500 && inst->U.I.Opcode == RC_OPCODE_KIL && - reader_data->Writer->U.I.SrcReg[0].File == RC_FILE_NONE) { - reader_data->Abort = 1; - return; - } + /* R300/R400 is unhappy about propagating + * 0: MOV temp[1], -none.1111; + * 1: KIL temp[1]; + * to + * 0: KIL -none.1111; + * + * R500 is fine with it. + */ + if (!reader_data->C->is_r500 && inst->U.I.Opcode == RC_OPCODE_KIL && + reader_data->Writer->U.I.SrcReg[0].File == RC_FILE_NONE) { + reader_data->Abort = 1; + return; + } - /* These instructions cannot read from the constants file. - * see radeonTransformTEX() - */ - if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && - reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && - reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_NONE && - (inst->U.I.Opcode == RC_OPCODE_TEX || - inst->U.I.Opcode == RC_OPCODE_TXB || - inst->U.I.Opcode == RC_OPCODE_TXP || - inst->U.I.Opcode == RC_OPCODE_TXD || - inst->U.I.Opcode == RC_OPCODE_TXL || - inst->U.I.Opcode == RC_OPCODE_KIL)){ - reader_data->Abort = 1; - return; - } + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if (reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_NONE && + (inst->U.I.Opcode == RC_OPCODE_TEX || inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || inst->U.I.Opcode == RC_OPCODE_KIL)) { + reader_data->Abort = 1; + return; + } } -static void src_clobbered_reads_cb( - void * data, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +src_clobbered_reads_cb(void *data, struct rc_instruction *inst, struct rc_src_register *src) { - struct src_clobbered_reads_cb_data * sc_data = data; + struct src_clobbered_reads_cb_data *sc_data = data; - if (src->File == sc_data->File - && src->Index == sc_data->Index - && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + if (src->File == sc_data->File && src->Index == sc_data->Index && + (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; - } + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } - if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { - sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; - } + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } } -static void is_src_clobbered_scan_write( - void * data, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) +static void +is_src_clobbered_scan_write(void *data, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct src_clobbered_reads_cb_data sc_data; - struct rc_reader_data * reader_data = data; - sc_data.File = file; - sc_data.Index = index; - sc_data.Mask = mask; - sc_data.ReaderData = reader_data; - rc_for_all_reads_src(reader_data->Writer, - src_clobbered_reads_cb, &sc_data); + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data *reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, src_clobbered_reads_cb, &sc_data); } -static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +static void +copy_propagate(struct radeon_compiler *c, struct rc_instruction *inst_mov) { - struct rc_reader_data reader_data; - unsigned int i; + struct rc_reader_data reader_data; + unsigned int i; - if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.WriteALUResult) - return; + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; - /* Get a list of all the readers of this MOV instruction. */ - reader_data.ExitOnAbort = 1; - rc_get_readers(c, inst_mov, &reader_data, - copy_propagate_scan_read, NULL, - is_src_clobbered_scan_write); + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); - if (reader_data.Abort || reader_data.ReaderCount == 0) - return; + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; - /* We can propagate SaturateMode if all the readers are MOV instructions - * without a presubtract operation, source negation and absolute. - * In that case, we just move SaturateMode to all readers. */ - if (inst_mov->U.I.SaturateMode) { - for (i = 0; i < reader_data.ReaderCount; i++) { - struct rc_instruction * inst = reader_data.Readers[i].Inst; + /* We can propagate SaturateMode if all the readers are MOV instructions + * without a presubtract operation, source negation and absolute. + * In that case, we just move SaturateMode to all readers. */ + if (inst_mov->U.I.SaturateMode) { + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction *inst = reader_data.Readers[i].Inst; - if (inst->U.I.Opcode != RC_OPCODE_MOV || - inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || - inst->U.I.SrcReg[0].Abs || - inst->U.I.SrcReg[0].Negate) { - return; - } - } - } + if (inst->U.I.Opcode != RC_OPCODE_MOV || inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || + inst->U.I.SrcReg[0].Abs || inst->U.I.SrcReg[0].Negate) { + return; + } + } + } - /* Propagate the MOV instruction. */ - for (i = 0; i < reader_data.ReaderCount; i++) { - struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction *inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = + chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); - if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = inst_mov->U.I.PreSub; - if (!inst->U.I.SaturateMode) - inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; - } + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + if (!inst->U.I.SaturateMode) + inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; + } - /* Finally, remove the original MOV instruction */ - rc_remove_instruction(inst_mov); + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); } /** * Check if a source register is actually always the same * swizzle constant. */ -static int is_src_uniform_constant(struct rc_src_register src, - rc_swizzle * pswz, unsigned int * pnegate) +static int +is_src_uniform_constant(struct rc_src_register src, rc_swizzle *pswz, unsigned int *pnegate) { - int have_used = 0; + int have_used = 0; - if (src.File != RC_FILE_NONE) { - *pswz = 0; - return 0; - } + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(src.Swizzle, chan); - if (swz < 4) { - *pswz = 0; - return 0; - } - if (swz == RC_SWIZZLE_UNUSED) - continue; + for (unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; - if (!have_used) { - *pswz = swz; - *pnegate = GET_BIT(src.Negate, chan); - have_used = 1; - } else { - if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { - *pswz = 0; - return 0; - } - } - } + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } - return 1; + return 1; } /** * Replace 0.0, 1.0 and 0.5 immediate constants by their * respective swizzles. Simplify instructions like ADD dst, src, 0; */ -static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +static void +constant_folding(struct radeon_compiler *c, struct rc_instruction *inst) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int i; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; - /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - struct rc_constant * constant; - struct rc_src_register newsrc; - int have_real_reference; - unsigned int chan; + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant *constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; - /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ - for (chan = 0; chan < 4; ++chan) - if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) - break; - if (chan == 4) { - inst->U.I.SrcReg[src].File = RC_FILE_NONE; - continue; - } + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } - /* Convert immediates to swizzles. */ - if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || - inst->U.I.SrcReg[src].RelAddr || - inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) - continue; + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; - constant = - &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + constant = &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; - if (constant->Type != RC_CONSTANT_IMMEDIATE) - continue; + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; - newsrc = inst->U.I.SrcReg[src]; - have_real_reference = 0; - for (chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); - unsigned int newswz; - float imm; - float baseimm; + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; - if (swz >= 4) - continue; + if (swz >= 4) + continue; - imm = constant->u.Immediate[swz]; - baseimm = imm; - if (imm < 0.0) - baseimm = -baseimm; + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; - if (baseimm == 0.0) { - newswz = RC_SWIZZLE_ZERO; - } else if (baseimm == 1.0) { - newswz = RC_SWIZZLE_ONE; - } else if (baseimm == 0.5 && c->has_half_swizzles) { - newswz = RC_SWIZZLE_HALF; - } else { - have_real_reference = 1; - continue; - } + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } - SET_SWZ(newsrc.Swizzle, chan, newswz); - if (imm < 0.0 && !newsrc.Abs) - newsrc.Negate ^= 1 << chan; - } + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } - if (!have_real_reference) { - newsrc.File = RC_FILE_NONE; - newsrc.Index = 0; - } + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } - /* don't make the swizzle worse */ - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc)) - continue; + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc)) + continue; - inst->U.I.SrcReg[src] = newsrc; - } + inst->U.I.SrcReg[src] = newsrc; + } - /* In case this instruction has been converted, make sure all of the - * registers that are no longer used are empty. */ - opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(i = opcode->NumSrcRegs; i < 3; i++) { - memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); - } + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } } /** * If src and dst use the same register, this function returns a writemask that * indicates which components are read by src. Otherwise zero is returned. */ -static unsigned int src_reads_dst_mask(struct rc_src_register src, - struct rc_dst_register dst) +static unsigned int +src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst) { - if (dst.File != src.File || dst.Index != src.Index) { - return 0; - } - return rc_swizzle_to_writemask(src.Swizzle); + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); } /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) * in any of its channels. Return 0 otherwise. */ -static int src_has_const_swz(struct rc_src_register src) { - int chan; - for(chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(src.Swizzle, chan); - if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF - || swz == RC_SWIZZLE_ONE) { - return 1; - } - } - return 0; +static int +src_has_const_swz(struct rc_src_register src) +{ + int chan; + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; } -static void presub_scan_read( - void * data, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +presub_scan_read(void *data, struct rc_instruction *inst, struct rc_src_register *src) { - struct rc_reader_data * reader_data = data; - rc_presubtract_op * presub_opcode = reader_data->CbData; + struct rc_reader_data *reader_data = data; + rc_presubtract_op *presub_opcode = reader_data->CbData; - if (!rc_inst_can_use_presub(reader_data->C, - inst, - *presub_opcode, - reader_data->Writer->U.I.DstReg.WriteMask, - src, - &reader_data->Writer->U.I.SrcReg[0], - &reader_data->Writer->U.I.SrcReg[1])) { - reader_data->Abort = 1; - return; - } + if (!rc_inst_can_use_presub( + reader_data->C, inst, *presub_opcode, reader_data->Writer->U.I.DstReg.WriteMask, src, + &reader_data->Writer->U.I.SrcReg[0], &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } } -static int presub_helper( - struct radeon_compiler * c, - struct rc_instruction * inst_add, - rc_presubtract_op presub_opcode, - rc_presub_replace_fn presub_replace) +static int +presub_helper(struct radeon_compiler *c, struct rc_instruction *inst_add, + rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace) { - struct rc_reader_data reader_data; - unsigned int i; - rc_presubtract_op cb_op = presub_opcode; + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; - reader_data.CbData = &cb_op; - reader_data.ExitOnAbort = 1; - rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, - is_src_clobbered_scan_write); + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); - if (reader_data.Abort || reader_data.ReaderCount == 0) - return 0; + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; - for(i = 0; i < reader_data.ReaderCount; i++) { - unsigned int src_index; - struct rc_reader reader = reader_data.Readers[i]; - const struct rc_opcode_info * info = - rc_get_opcode_info(reader.Inst->U.I.Opcode); + for (i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info *info = rc_get_opcode_info(reader.Inst->U.I.Opcode); - for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) - presub_replace(inst_add, reader.Inst, src_index); - } - } - return 1; + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; } -static void presub_replace_add( - struct rc_instruction * inst_add, - struct rc_instruction * inst_reader, - unsigned int src_index) +static void +presub_replace_add(struct rc_instruction *inst_add, struct rc_instruction *inst_reader, + unsigned int src_index) { - rc_presubtract_op presub_opcode; + rc_presubtract_op presub_opcode; - unsigned int negates = 0; - if (inst_add->U.I.SrcReg[0].Negate) - negates++; - if (inst_add->U.I.SrcReg[1].Negate) - negates++; - assert(negates != 2 || inst_add->U.I.SrcReg[1].Negate == inst_add->U.I.SrcReg[0].Negate); + unsigned int negates = 0; + if (inst_add->U.I.SrcReg[0].Negate) + negates++; + if (inst_add->U.I.SrcReg[1].Negate) + negates++; + assert(negates != 2 || inst_add->U.I.SrcReg[1].Negate == inst_add->U.I.SrcReg[0].Negate); - if (negates == 1) - presub_opcode = RC_PRESUB_SUB; - else - presub_opcode = RC_PRESUB_ADD; + if (negates == 1) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; - if (inst_add->U.I.SrcReg[1].Negate && negates == 1) { - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; - } else { - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; - inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; - } - /* If both sources are negative we can move the negate to the presub. */ - unsigned negate_mask = negates == 1 ? 0 : inst_add->U.I.SrcReg[0].Negate; - inst_reader->U.I.PreSub.SrcReg[0].Negate = negate_mask; - inst_reader->U.I.PreSub.SrcReg[1].Negate = negate_mask; - inst_reader->U.I.PreSub.Opcode = presub_opcode; - inst_reader->U.I.SrcReg[src_index] = - chain_srcregs(inst_reader->U.I.SrcReg[src_index], - inst_reader->U.I.PreSub.SrcReg[0]); - inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate && negates == 1) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + /* If both sources are negative we can move the negate to the presub. */ + unsigned negate_mask = negates == 1 ? 0 : inst_add->U.I.SrcReg[0].Negate; + inst_reader->U.I.PreSub.SrcReg[0].Negate = negate_mask; + inst_reader->U.I.PreSub.SrcReg[1].Negate = negate_mask; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } -static int is_presub_candidate( - struct radeon_compiler * c, - struct rc_instruction * inst) +static int +is_presub_candidate(struct radeon_compiler *c, struct rc_instruction *inst) { - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int i; - unsigned int is_constant[2] = {0, 0}; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; - assert(inst->U.I.Opcode == RC_OPCODE_ADD || inst->U.I.Opcode == RC_OPCODE_MAD); + assert(inst->U.I.Opcode == RC_OPCODE_ADD || inst->U.I.Opcode == RC_OPCODE_MAD); - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE - || inst->U.I.SaturateMode - || inst->U.I.WriteALUResult - || inst->U.I.Omod) { - return 0; - } + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode || + inst->U.I.WriteALUResult || inst->U.I.Omod) { + return 0; + } - /* If first two sources use a constant swizzle, then we can't convert it to - * a presubtract operation. In fact for the ADD and SUB presubtract - * operations neither source can contain a constant swizzle. This - * specific case is checked in peephole_add_presub_add() when - * we make sure the swizzles for both sources are equal, so we - * don't need to worry about it here. */ - for (i = 0; i < 2; i++) { - int chan; - for (chan = 0; chan < 4; chan++) { - rc_swizzle swz = - get_swz(inst->U.I.SrcReg[i].Swizzle, chan); - if (swz == RC_SWIZZLE_ONE - || swz == RC_SWIZZLE_ZERO - || swz == RC_SWIZZLE_HALF) { - is_constant[i] = 1; - } - } - } - if (is_constant[0] && is_constant[1]) - return 0; + /* If first two sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE || swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; - for(i = 0; i < info->NumSrcRegs; i++) { - struct rc_src_register src = inst->U.I.SrcReg[i]; - if (src_reads_dst_mask(src, inst->U.I.DstReg)) - return 0; + for (i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; - src.File = RC_FILE_PRESUB; - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) - return 0; - } - return 1; + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; } -static int peephole_add_presub_add( - struct radeon_compiler * c, - struct rc_instruction * inst_add) +static int +peephole_add_presub_add(struct radeon_compiler *c, struct rc_instruction *inst_add) { - unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; - unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; - if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) - return 0; + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; - /* src0 and src1 can't have absolute values */ - if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* if src0 is negative, at least all bits of dstmask have to be set */ - if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) - return 0; + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - /* if src1 is negative, at least all bits of dstmask have to be set */ - if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) - return 0; + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; - if (!is_presub_candidate(c, inst_add)) - return 0; + if (!is_presub_candidate(c, inst_add)) + return 0; - if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { - rc_remove_instruction(inst_add); - return 1; - } - return 0; + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; } -static void presub_replace_inv( - struct rc_instruction * inst_add, - struct rc_instruction * inst_reader, - unsigned int src_index) +static void +presub_replace_inv(struct rc_instruction *inst_add, struct rc_instruction *inst_reader, + unsigned int src_index) { - /* We must be careful not to modify inst_add, since it - * is possible it will remain part of the program.*/ - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], - inst_reader->U.I.PreSub.SrcReg[0]); + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]); - inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; } -static void presub_replace_bias( - struct rc_instruction * inst_mad, - struct rc_instruction * inst_reader, - unsigned int src_index) +static void +presub_replace_bias(struct rc_instruction *inst_mad, struct rc_instruction *inst_reader, + unsigned int src_index) { - /* We must be careful not to modify inst_mad, since it - * is possible it will remain part of the program.*/ - inst_reader->U.I.PreSub.SrcReg[0] = inst_mad->U.I.SrcReg[0]; - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.Opcode = RC_PRESUB_BIAS; - inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], - inst_reader->U.I.PreSub.SrcReg[0]); + /* We must be careful not to modify inst_mad, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_mad->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_BIAS; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]); - inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_BIAS; + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_BIAS; } /** @@ -553,41 +517,39 @@ static void presub_replace_bias( * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ -static int peephole_add_presub_inv( - struct radeon_compiler * c, - struct rc_instruction * inst_add) +static int +peephole_add_presub_inv(struct radeon_compiler *c, struct rc_instruction *inst_add) { - unsigned int i, swz; + unsigned int i, swz; - if (!is_presub_candidate(c, inst_add)) - return 0; + if (!is_presub_candidate(c, inst_add)) + return 0; - /* Check if src0 is 1. */ - /* XXX It would be nice to use is_src_uniform_constant here, but that - * function only works if the register's file is RC_FILE_NONE */ - for(i = 0; i < 4; i++ ) { - if (!(inst_add->U.I.DstReg.WriteMask & (1 << i))) - continue; + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for (i = 0; i < 4; i++) { + if (!(inst_add->U.I.DstReg.WriteMask & (1 << i))) + continue; - swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); - if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i)) - return 0; - } + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i)) + return 0; + } - /* Check src1. */ - if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != - inst_add->U.I.DstReg.WriteMask - || inst_add->U.I.SrcReg[1].Abs - || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask || + inst_add->U.I.SrcReg[1].Abs || src_has_const_swz(inst_add->U.I.SrcReg[1])) { - return 0; - } + return 0; + } - if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { - rc_remove_instruction(inst_add); - return 1; - } - return 0; + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; } /** @@ -600,275 +562,258 @@ static int peephole_add_presub_inv( * 0 if the MAD instruction is still part of the program. * 1 if the MAD instruction is no longer part of the program. */ -static int peephole_mad_presub_bias( - struct radeon_compiler * c, - struct rc_instruction * inst_mad) +static int +peephole_mad_presub_bias(struct radeon_compiler *c, struct rc_instruction *inst_mad) { - unsigned int i, swz; + unsigned int i, swz; - if (!is_presub_candidate(c, inst_mad)) - return 0; + if (!is_presub_candidate(c, inst_mad)) + return 0; - /* Check if src2 is 1. */ - for(i = 0; i < 4; i++ ) { - if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) - continue; + /* Check if src2 is 1. */ + for (i = 0; i < 4; i++) { + if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) + continue; - swz = GET_SWZ(inst_mad->U.I.SrcReg[2].Swizzle, i); - if (swz != RC_SWIZZLE_ONE || inst_mad->U.I.SrcReg[2].Negate & (1 << i)) - return 0; - } + swz = GET_SWZ(inst_mad->U.I.SrcReg[2].Swizzle, i); + if (swz != RC_SWIZZLE_ONE || inst_mad->U.I.SrcReg[2].Negate & (1 << i)) + return 0; + } - /* Check if src1 is 2. */ - struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1]; - if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs) - return 0; - if (src1_reg.File == RC_FILE_INLINE) { - if (rc_inline_to_float(src1_reg.Index) != 2.0f) - return 0; - } else { - if (src1_reg.File != RC_FILE_CONSTANT) - return 0; + /* Check if src1 is 2. */ + struct rc_src_register src1_reg = inst_mad->U.I.SrcReg[1]; + if ((src1_reg.Negate & inst_mad->U.I.DstReg.WriteMask) != 0 || src1_reg.Abs) + return 0; + if (src1_reg.File == RC_FILE_INLINE) { + if (rc_inline_to_float(src1_reg.Index) != 2.0f) + return 0; + } else { + if (src1_reg.File != RC_FILE_CONSTANT) + return 0; - struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index]; - if (constant->Type != RC_CONSTANT_IMMEDIATE) - return 0; - for (i = 0; i < 4; i++) { - if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) - continue; - swz = GET_SWZ(src1_reg.Swizzle, i); - if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0) - return 0; - } - } + struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index]; + if (constant->Type != RC_CONSTANT_IMMEDIATE) + return 0; + for (i = 0; i < 4; i++) { + if (!(inst_mad->U.I.DstReg.WriteMask & (1 << i))) + continue; + swz = GET_SWZ(src1_reg.Swizzle, i); + if (swz >= RC_SWIZZLE_ZERO || constant->u.Immediate[swz] != 2.0) + return 0; + } + } - /* Check src0. */ - if ((inst_mad->U.I.SrcReg[0].Negate & inst_mad->U.I.DstReg.WriteMask) != - inst_mad->U.I.DstReg.WriteMask - || inst_mad->U.I.SrcReg[0].Abs - || src_has_const_swz(inst_mad->U.I.SrcReg[0])) { + /* Check src0. */ + if ((inst_mad->U.I.SrcReg[0].Negate & inst_mad->U.I.DstReg.WriteMask) != + inst_mad->U.I.DstReg.WriteMask || + inst_mad->U.I.SrcReg[0].Abs || src_has_const_swz(inst_mad->U.I.SrcReg[0])) { - return 0; - } + return 0; + } - if (presub_helper(c, inst_mad, RC_PRESUB_BIAS, presub_replace_bias)) { - rc_remove_instruction(inst_mad); - return 1; - } - return 0; + if (presub_helper(c, inst_mad, RC_PRESUB_BIAS, presub_replace_bias)) { + rc_remove_instruction(inst_mad); + return 1; + } + return 0; } struct peephole_mul_cb_data { - struct rc_dst_register * Writer; - unsigned int Clobbered; + struct rc_dst_register *Writer; + unsigned int Clobbered; }; -static void omod_filter_reader_cb( - void * userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) +static void +omod_filter_reader_cb(void *userdata, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct peephole_mul_cb_data * d = userdata; - if (rc_src_reads_dst_mask(file, mask, index, - d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { + struct peephole_mul_cb_data *d = userdata; + if (rc_src_reads_dst_mask(file, mask, index, d->Writer->File, d->Writer->Index, + d->Writer->WriteMask)) { - d->Clobbered = 1; - } + d->Clobbered = 1; + } } -static void omod_filter_writer_cb( - void * userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) +static void +omod_filter_writer_cb(void *userdata, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct peephole_mul_cb_data * d = userdata; - if (file == d->Writer->File && index == d->Writer->Index && - (mask & d->Writer->WriteMask)) { - d->Clobbered = 1; - } + struct peephole_mul_cb_data *d = userdata; + if (file == d->Writer->File && index == d->Writer->Index && (mask & d->Writer->WriteMask)) { + d->Clobbered = 1; + } } -static int peephole_mul_omod( - struct radeon_compiler * c, - struct rc_instruction * inst_mul, - struct rc_list * var_list) +static int +peephole_mul_omod(struct radeon_compiler *c, struct rc_instruction *inst_mul, + struct rc_list *var_list) { - unsigned int chan = 0, swz, i; - int const_index = -1; - int temp_index = -1; - float const_value; - rc_omod_op omod_op = RC_OMOD_DISABLE; - struct rc_list * writer_list; - struct rc_variable * var; - struct peephole_mul_cb_data cb_data; - unsigned writemask_sum; + unsigned int chan = 0, swz, i; + int const_index = -1; + int temp_index = -1; + float const_value; + rc_omod_op omod_op = RC_OMOD_DISABLE; + struct rc_list *writer_list; + struct rc_variable *var; + struct peephole_mul_cb_data cb_data; + unsigned writemask_sum; - for (i = 0; i < 2; i++) { - unsigned int j; - if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT - && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY - && inst_mul->U.I.SrcReg[i].File != RC_FILE_NONE) { - return 0; - } + for (i = 0; i < 2; i++) { + unsigned int j; + if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT && + inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY && + inst_mul->U.I.SrcReg[i].File != RC_FILE_NONE) { + return 0; + } - /* The only relevant case with constant swizzles we should check for - * is multiply by one half. - */ - if (inst_mul->U.I.SrcReg[i].File == RC_FILE_NONE) { - for (j = 0; j < 4; j++) { - swz = GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); - if (swz == RC_SWIZZLE_UNUSED) { - continue; - } - if (swz != RC_SWIZZLE_HALF) { - return 0; - } else { - omod_op = RC_OMOD_DIV_2; - } - } - } + /* The only relevant case with constant swizzles we should check for + * is multiply by one half. + */ + if (inst_mul->U.I.SrcReg[i].File == RC_FILE_NONE) { + for (j = 0; j < 4; j++) { + swz = GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) { + continue; + } + if (swz != RC_SWIZZLE_HALF) { + return 0; + } else { + omod_op = RC_OMOD_DIV_2; + } + } + } - if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - if (temp_index != -1) { - /* The instruction has two temp sources */ - return 0; - } else { - temp_index = i; - continue; - } - } - /* If we get this far Src[i] must be a constant src */ - if (inst_mul->U.I.SrcReg[i].Negate) { - return 0; - } - /* The constant src needs to read from the same swizzle */ - swz = RC_SWIZZLE_UNUSED; - chan = 0; - for (j = 0; j < 4; j++) { - unsigned int j_swz = - GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); - if (j_swz == RC_SWIZZLE_UNUSED) { - continue; - } - if (swz == RC_SWIZZLE_UNUSED) { - swz = j_swz; - chan = j; - } else if (j_swz != swz) { - return 0; - } - } + if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (temp_index != -1) { + /* The instruction has two temp sources */ + return 0; + } else { + temp_index = i; + continue; + } + } + /* If we get this far Src[i] must be a constant src */ + if (inst_mul->U.I.SrcReg[i].Negate) { + return 0; + } + /* The constant src needs to read from the same swizzle */ + swz = RC_SWIZZLE_UNUSED; + chan = 0; + for (j = 0; j < 4; j++) { + unsigned int j_swz = GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); + if (j_swz == RC_SWIZZLE_UNUSED) { + continue; + } + if (swz == RC_SWIZZLE_UNUSED) { + swz = j_swz; + chan = j; + } else if (j_swz != swz) { + return 0; + } + } - if (const_index != -1) { - /* The instruction has two constant sources */ - return 0; - } else { - const_index = i; - } - } + if (const_index != -1) { + /* The instruction has two constant sources */ + return 0; + } else { + const_index = i; + } + } - if (omod_op == RC_OMOD_DISABLE) { - if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, - inst_mul->U.I.SrcReg[const_index].Index)) { - return 0; - } - const_value = rc_get_constant_value(c, - inst_mul->U.I.SrcReg[const_index].Index, - inst_mul->U.I.SrcReg[const_index].Swizzle, - inst_mul->U.I.SrcReg[const_index].Negate, - chan); + if (omod_op == RC_OMOD_DISABLE) { + if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, + inst_mul->U.I.SrcReg[const_index].Index)) { + return 0; + } + const_value = rc_get_constant_value(c, inst_mul->U.I.SrcReg[const_index].Index, + inst_mul->U.I.SrcReg[const_index].Swizzle, + inst_mul->U.I.SrcReg[const_index].Negate, chan); - if (const_value == 2.0f) { - omod_op = RC_OMOD_MUL_2; - } else if (const_value == 4.0f) { - omod_op = RC_OMOD_MUL_4; - } else if (const_value == 8.0f) { - omod_op = RC_OMOD_MUL_8; - } else if (const_value == (1.0f / 2.0f)) { - omod_op = RC_OMOD_DIV_2; - } else if (const_value == (1.0f / 4.0f)) { - omod_op = RC_OMOD_DIV_4; - } else if (const_value == (1.0f / 8.0f)) { - omod_op = RC_OMOD_DIV_8; - } else { - return 0; - } - } + if (const_value == 2.0f) { + omod_op = RC_OMOD_MUL_2; + } else if (const_value == 4.0f) { + omod_op = RC_OMOD_MUL_4; + } else if (const_value == 8.0f) { + omod_op = RC_OMOD_MUL_8; + } else if (const_value == (1.0f / 2.0f)) { + omod_op = RC_OMOD_DIV_2; + } else if (const_value == (1.0f / 4.0f)) { + omod_op = RC_OMOD_DIV_4; + } else if (const_value == (1.0f / 8.0f)) { + omod_op = RC_OMOD_DIV_8; + } else { + return 0; + } + } - writer_list = rc_variable_list_get_writers_one_reader(var_list, - RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); + writer_list = rc_variable_list_get_writers_one_reader(var_list, RC_INSTRUCTION_NORMAL, + &inst_mul->U.I.SrcReg[temp_index]); - if (!writer_list) { - return 0; - } + if (!writer_list) { + return 0; + } - cb_data.Clobbered = 0; - cb_data.Writer = &inst_mul->U.I.DstReg; - for (var = writer_list->Item; var; var = var->Friend) { - struct rc_instruction * inst; - const struct rc_opcode_info * info = rc_get_opcode_info( - var->Inst->U.I.Opcode); - if (info->HasTexture) { - return 0; - } - if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { - return 0; - } + cb_data.Clobbered = 0; + cb_data.Writer = &inst_mul->U.I.DstReg; + for (var = writer_list->Item; var; var = var->Friend) { + struct rc_instruction *inst; + const struct rc_opcode_info *info = rc_get_opcode_info(var->Inst->U.I.Opcode); + if (info->HasTexture) { + return 0; + } + if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { + return 0; + } - /* Empirical testing shows that DDX/DDY directly into output - * with non-identity omod is problematic. - */ - if ((info->Opcode == RC_OPCODE_DDX || info->Opcode == RC_OPCODE_DDY) && - inst_mul->U.I.DstReg.File == RC_FILE_OUTPUT) { - return 0; - } + /* Empirical testing shows that DDX/DDY directly into output + * with non-identity omod is problematic. + */ + if ((info->Opcode == RC_OPCODE_DDX || info->Opcode == RC_OPCODE_DDY) && + inst_mul->U.I.DstReg.File == RC_FILE_OUTPUT) { + return 0; + } - for (inst = inst_mul->Prev; inst != var->Inst; - inst = inst->Prev) { - rc_for_all_reads_mask(inst, omod_filter_reader_cb, - &cb_data); - rc_for_all_writes_mask(inst, omod_filter_writer_cb, - &cb_data); - if (cb_data.Clobbered) { - break; - } - } - } + for (inst = inst_mul->Prev; inst != var->Inst; inst = inst->Prev) { + rc_for_all_reads_mask(inst, omod_filter_reader_cb, &cb_data); + rc_for_all_writes_mask(inst, omod_filter_writer_cb, &cb_data); + if (cb_data.Clobbered) { + break; + } + } + } - if (cb_data.Clobbered) { - return 0; - } + if (cb_data.Clobbered) { + return 0; + } - writemask_sum = rc_variable_writemask_sum(writer_list->Item); + writemask_sum = rc_variable_writemask_sum(writer_list->Item); - /* rc_normal_rewrite_writemask can't expand a previous writemask to store - * more channels replicated. - */ - if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask)) - return 0; + /* rc_normal_rewrite_writemask can't expand a previous writemask to store + * more channels replicated. + */ + if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask)) + return 0; - /* Rewrite the instructions */ - for (var = writer_list->Item; var; var = var->Friend) { - struct rc_variable * writer = var; - unsigned conversion_swizzle = RC_SWIZZLE_UUUU; - for (chan = 0; chan < 4; chan++) { - unsigned swz = GET_SWZ(inst_mul->U.I.SrcReg[temp_index].Swizzle, chan); - if (swz <= RC_SWIZZLE_W) - SET_SWZ(conversion_swizzle, swz, chan); - } - writer->Inst->U.I.Omod = omod_op; - writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; - writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; - rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); - writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; - } + /* Rewrite the instructions */ + for (var = writer_list->Item; var; var = var->Friend) { + struct rc_variable *writer = var; + unsigned conversion_swizzle = RC_SWIZZLE_UUUU; + for (chan = 0; chan < 4; chan++) { + unsigned swz = GET_SWZ(inst_mul->U.I.SrcReg[temp_index].Swizzle, chan); + if (swz <= RC_SWIZZLE_W) + SET_SWZ(conversion_swizzle, swz, chan); + } + writer->Inst->U.I.Omod = omod_op; + writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; + writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; + rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); + writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; + } - rc_remove_instruction(inst_mul); + rc_remove_instruction(inst_mul); - return 1; + return 1; } /** @@ -879,177 +824,173 @@ static int peephole_mul_omod( int rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data) { - switch(inst->U.I.Opcode) { - case RC_OPCODE_ADD: - { - if (peephole_add_presub_inv(c, inst)) - return 1; - if (peephole_add_presub_add(c, inst)) - return 1; - break; - } - case RC_OPCODE_MAD: - { - if (peephole_mad_presub_bias(c, inst)) - return 1; - break; - } - default: - break; - } - return 0; + switch (inst->U.I.Opcode) { + case RC_OPCODE_ADD: { + if (peephole_add_presub_inv(c, inst)) + return 1; + if (peephole_add_presub_add(c, inst)) + return 1; + break; + } + case RC_OPCODE_MAD: { + if (peephole_mad_presub_bias(c, inst)) + return 1; + break; + } + default: + break; + } + return 0; } -static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) +static unsigned int +merge_swizzles(unsigned int swz1, unsigned int swz2) { - unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); - for (unsigned int chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(swz1, chan); - if (swz != RC_SWIZZLE_UNUSED) { - SET_SWZ(new_swz, chan, swz); - continue; - } - swz = GET_SWZ(swz2, chan); - SET_SWZ(new_swz, chan, swz); - } - return new_swz; + unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (unsigned int chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swz1, chan); + if (swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(new_swz, chan, swz); + continue; + } + swz = GET_SWZ(swz2, chan); + SET_SWZ(new_swz, chan, swz); + } + return new_swz; } /* Sets negate to 0 for unused channels. */ -static unsigned int clean_negate(struct rc_src_register src) +static unsigned int +clean_negate(struct rc_src_register src) { - unsigned int new_negate = 0; - for (unsigned int chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(src.Swizzle, chan); - if (swz != RC_SWIZZLE_UNUSED) - new_negate |= src.Negate & (1 << chan); - } - return new_negate; + unsigned int new_negate = 0; + for (unsigned int chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz != RC_SWIZZLE_UNUSED) + new_negate |= src.Negate & (1 << chan); + } + return new_negate; } -static unsigned int merge_negates(struct rc_src_register src1, struct rc_src_register src2) +static unsigned int +merge_negates(struct rc_src_register src1, struct rc_src_register src2) { - return clean_negate(src1) | clean_negate(src2); + return clean_negate(src1) | clean_negate(src2); } -static unsigned int fill_swizzle(unsigned int orig_swz, unsigned int wmask, unsigned int const_swz) +static unsigned int +fill_swizzle(unsigned int orig_swz, unsigned int wmask, unsigned int const_swz) { - for (unsigned int chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(orig_swz, chan); - if (swz == RC_SWIZZLE_UNUSED && (wmask & (1 << chan))) { - SET_SWZ(orig_swz, chan, const_swz); - } - } - return orig_swz; + for (unsigned int chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(orig_swz, chan); + if (swz == RC_SWIZZLE_UNUSED && (wmask & (1 << chan))) { + SET_SWZ(orig_swz, chan, const_swz); + } + } + return orig_swz; } -static int have_shared_source(struct rc_instruction * inst1, struct rc_instruction * inst2) +static int +have_shared_source(struct rc_instruction *inst1, struct rc_instruction *inst2) { - int shared_src = -1; - const struct rc_opcode_info * opcode1 = rc_get_opcode_info(inst1->U.I.Opcode); - const struct rc_opcode_info * opcode2 = rc_get_opcode_info(inst2->U.I.Opcode); - for (unsigned i = 0; i < opcode1->NumSrcRegs; i++) { - for (unsigned j = 0; j < opcode2->NumSrcRegs; j++) { - if (inst1->U.I.SrcReg[i].File == inst2->U.I.SrcReg[j].File && - inst1->U.I.SrcReg[i].Index == inst2->U.I.SrcReg[j].Index && - inst1->U.I.SrcReg[i].RelAddr == inst2->U.I.SrcReg[j].RelAddr && - inst1->U.I.SrcReg[i].Abs == inst2->U.I.SrcReg[j].Abs) - shared_src = i; - } - } - return shared_src; + int shared_src = -1; + const struct rc_opcode_info *opcode1 = rc_get_opcode_info(inst1->U.I.Opcode); + const struct rc_opcode_info *opcode2 = rc_get_opcode_info(inst2->U.I.Opcode); + for (unsigned i = 0; i < opcode1->NumSrcRegs; i++) { + for (unsigned j = 0; j < opcode2->NumSrcRegs; j++) { + if (inst1->U.I.SrcReg[i].File == inst2->U.I.SrcReg[j].File && + inst1->U.I.SrcReg[i].Index == inst2->U.I.SrcReg[j].Index && + inst1->U.I.SrcReg[i].RelAddr == inst2->U.I.SrcReg[j].RelAddr && + inst1->U.I.SrcReg[i].Abs == inst2->U.I.SrcReg[j].Abs) + shared_src = i; + } + } + return shared_src; } /** * Merges two MOVs writing different channels of the same destination register * with the use of the constant swizzles. */ -static bool merge_movs( - struct radeon_compiler * c, - struct rc_instruction * inst, - struct rc_instruction * cur) +static bool +merge_movs(struct radeon_compiler *c, struct rc_instruction *inst, struct rc_instruction *cur) { - /* We can merge two MOVs into MOV if one of them is from inline constant, - * i.e., constant swizzles and RC_FILE_NONE). - * - * For example - * MOV temp[0].x none.1___ - * MOV temp[0].y input[0]._x__ - * - * becomes - * MOV temp[0].xy input[0].1x__ - */ - unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; - if (cur->U.I.SrcReg[0].File == RC_FILE_NONE || - inst->U.I.SrcReg[0].File == RC_FILE_NONE) { - struct rc_src_register src; - if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) - src = inst->U.I.SrcReg[0]; - else - src = cur->U.I.SrcReg[0]; - src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, - inst->U.I.SrcReg[0].Swizzle); - src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); - if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { - cur->U.I.DstReg.WriteMask |= orig_dst_wmask; - cur->U.I.SrcReg[0] = src; - rc_remove_instruction(inst); - return true; - } - } + /* We can merge two MOVs into MOV if one of them is from inline constant, + * i.e., constant swizzles and RC_FILE_NONE). + * + * For example + * MOV temp[0].x none.1___ + * MOV temp[0].y input[0]._x__ + * + * becomes + * MOV temp[0].xy input[0].1x__ + */ + unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; + if (cur->U.I.SrcReg[0].File == RC_FILE_NONE || inst->U.I.SrcReg[0].File == RC_FILE_NONE) { + struct rc_src_register src; + if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) + src = inst->U.I.SrcReg[0]; + else + src = cur->U.I.SrcReg[0]; + src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, inst->U.I.SrcReg[0].Swizzle); + src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); + if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { + cur->U.I.DstReg.WriteMask |= orig_dst_wmask; + cur->U.I.SrcReg[0] = src; + rc_remove_instruction(inst); + return true; + } + } - /* Handle the trivial case where the MOVs share a source. - * - * For example - * MOV temp[0].x const[0].x - * MOV temp[0].y const[0].z - * - * becomes - * MOV temp[0].xy const[0].xz - */ - if (have_shared_source(inst, cur) == 0) { - struct rc_src_register src = cur->U.I.SrcReg[0]; - src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); - src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, - inst->U.I.SrcReg[0].Swizzle); + /* Handle the trivial case where the MOVs share a source. + * + * For example + * MOV temp[0].x const[0].x + * MOV temp[0].y const[0].z + * + * becomes + * MOV temp[0].xy const[0].xz + */ + if (have_shared_source(inst, cur) == 0) { + struct rc_src_register src = cur->U.I.SrcReg[0]; + src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]); + src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle, inst->U.I.SrcReg[0].Swizzle); - if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { - cur->U.I.DstReg.WriteMask |= orig_dst_wmask; - cur->U.I.SrcReg[0] = src; - rc_remove_instruction(inst); - return true; - } - } + if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) { + cur->U.I.DstReg.WriteMask |= orig_dst_wmask; + cur->U.I.SrcReg[0] = src; + rc_remove_instruction(inst); + return true; + } + } - /* Otherwise, we can convert the MOVs into ADD. - * - * For example - * MOV temp[0].x const[0].x - * MOV temp[0].y input[0].y - * - * becomes - * ADD temp[0].xy const[0].x0 input[0].0y - */ - unsigned wmask = cur->U.I.DstReg.WriteMask | orig_dst_wmask; - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - struct rc_src_register src1 = cur->U.I.SrcReg[0]; + /* Otherwise, we can convert the MOVs into ADD. + * + * For example + * MOV temp[0].x const[0].x + * MOV temp[0].y input[0].y + * + * becomes + * ADD temp[0].xy const[0].x0 input[0].0y + */ + unsigned wmask = cur->U.I.DstReg.WriteMask | orig_dst_wmask; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = cur->U.I.SrcReg[0]; - src0.Swizzle = fill_swizzle(src0.Swizzle, - wmask, RC_SWIZZLE_ZERO); - src1.Swizzle = fill_swizzle(src1.Swizzle, - wmask, RC_SWIZZLE_ZERO); - if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src0) || - !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src1)) - return false; + src0.Swizzle = fill_swizzle(src0.Swizzle, wmask, RC_SWIZZLE_ZERO); + src1.Swizzle = fill_swizzle(src1.Swizzle, wmask, RC_SWIZZLE_ZERO); + if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src0) || + !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src1)) + return false; - cur->U.I.DstReg.WriteMask = wmask; - cur->U.I.Opcode = RC_OPCODE_ADD; - cur->U.I.SrcReg[0] = src0; - cur->U.I.SrcReg[1] = src1; + cur->U.I.DstReg.WriteMask = wmask; + cur->U.I.Opcode = RC_OPCODE_ADD; + cur->U.I.SrcReg[0] = src0; + cur->U.I.SrcReg[1] = src1; - /* finally delete the original mov */ - rc_remove_instruction(inst); - return true; + /* finally delete the original mov */ + rc_remove_instruction(inst); + return true; } /** @@ -1063,84 +1004,79 @@ static bool merge_movs( * becomes * MAD temp[0].xyz const[1].0yz const[2].0yz const[0].x00 */ -static int merge_mov_add_mul( - struct radeon_compiler * c, - struct rc_instruction * inst1, - struct rc_instruction * inst2) +static int +merge_mov_add_mul(struct radeon_compiler *c, struct rc_instruction *inst1, + struct rc_instruction *inst2) { - struct rc_instruction * inst, * mov; - if (inst1->U.I.Opcode == RC_OPCODE_MOV) { - mov = inst1; - inst = inst2; - } else { - mov = inst2; - inst = inst1; - } + struct rc_instruction *inst, *mov; + if (inst1->U.I.Opcode == RC_OPCODE_MOV) { + mov = inst1; + inst = inst2; + } else { + mov = inst2; + inst = inst1; + } - const bool is_mul = inst->U.I.Opcode == RC_OPCODE_MUL; - int shared_index = have_shared_source(inst, mov); - unsigned wmask = mov->U.I.DstReg.WriteMask | inst->U.I.DstReg.WriteMask; + const bool is_mul = inst->U.I.Opcode == RC_OPCODE_MUL; + int shared_index = have_shared_source(inst, mov); + unsigned wmask = mov->U.I.DstReg.WriteMask | inst->U.I.DstReg.WriteMask; - /* If there is a shared source, just merge the swizzles and be done with it. */ - if (shared_index != -1) { - struct rc_src_register shared_src = inst->U.I.SrcReg[shared_index]; - struct rc_src_register other_src = inst->U.I.SrcReg[1 - shared_index]; + /* If there is a shared source, just merge the swizzles and be done with it. */ + if (shared_index != -1) { + struct rc_src_register shared_src = inst->U.I.SrcReg[shared_index]; + struct rc_src_register other_src = inst->U.I.SrcReg[1 - shared_index]; - shared_src.Negate = merge_negates(mov->U.I.SrcReg[0], shared_src); - shared_src.Swizzle = merge_swizzles(shared_src.Swizzle, - mov->U.I.SrcReg[0].Swizzle); - other_src.Negate = clean_negate(other_src); - unsigned int swz = is_mul ? RC_SWIZZLE_ONE : RC_SWIZZLE_ZERO; - other_src.Swizzle = fill_swizzle(other_src.Swizzle, wmask, swz); + shared_src.Negate = merge_negates(mov->U.I.SrcReg[0], shared_src); + shared_src.Swizzle = merge_swizzles(shared_src.Swizzle, mov->U.I.SrcReg[0].Swizzle); + other_src.Negate = clean_negate(other_src); + unsigned int swz = is_mul ? RC_SWIZZLE_ONE : RC_SWIZZLE_ZERO; + other_src.Swizzle = fill_swizzle(other_src.Swizzle, wmask, swz); - if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, shared_src) || - !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, other_src)) - return 0; + if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, shared_src) || + !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, other_src)) + return 0; - inst2->U.I.Opcode = inst->U.I.Opcode; - inst2->U.I.SrcReg[0] = shared_src; - inst2->U.I.SrcReg[1] = other_src; + inst2->U.I.Opcode = inst->U.I.Opcode; + inst2->U.I.SrcReg[0] = shared_src; + inst2->U.I.SrcReg[1] = other_src; - /* TODO: we can do a bit better in the special case when one of the sources is none. - * Convert to MAD otherwise. - */ - } else { - struct rc_src_register src0, src1, src2; - if (is_mul) { - src2 = mov->U.I.SrcReg[0]; - src0 = inst->U.I.SrcReg[0]; - src1 = inst->U.I.SrcReg[1]; - } else { - src0 = mov->U.I.SrcReg[0]; - src1 = inst->U.I.SrcReg[0]; - src2 = inst->U.I.SrcReg[1]; - } - /* The following login expects that the unused channels have empty negate bits. */ - src0.Negate = clean_negate(src0); - src1.Negate = clean_negate(src1); - src2.Negate = clean_negate(src2); + /* TODO: we can do a bit better in the special case when one of the sources is none. + * Convert to MAD otherwise. + */ + } else { + struct rc_src_register src0, src1, src2; + if (is_mul) { + src2 = mov->U.I.SrcReg[0]; + src0 = inst->U.I.SrcReg[0]; + src1 = inst->U.I.SrcReg[1]; + } else { + src0 = mov->U.I.SrcReg[0]; + src1 = inst->U.I.SrcReg[0]; + src2 = inst->U.I.SrcReg[1]; + } + /* The following login expects that the unused channels have empty negate bits. */ + src0.Negate = clean_negate(src0); + src1.Negate = clean_negate(src1); + src2.Negate = clean_negate(src2); - src0.Swizzle = fill_swizzle(src0.Swizzle, - wmask, RC_SWIZZLE_ONE); - src1.Swizzle = fill_swizzle(src1.Swizzle, - wmask, is_mul ? RC_SWIZZLE_ZERO : RC_SWIZZLE_ONE); - src2.Swizzle = fill_swizzle(src2.Swizzle, - wmask, RC_SWIZZLE_ZERO); - if (!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src0) || - !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src1) || - !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src2)) - return 0; + src0.Swizzle = fill_swizzle(src0.Swizzle, wmask, RC_SWIZZLE_ONE); + src1.Swizzle = fill_swizzle(src1.Swizzle, wmask, is_mul ? RC_SWIZZLE_ZERO : RC_SWIZZLE_ONE); + src2.Swizzle = fill_swizzle(src2.Swizzle, wmask, RC_SWIZZLE_ZERO); + if (!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src0) || + !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src1) || + !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src2)) + return 0; - inst2->U.I.Opcode = RC_OPCODE_MAD; - inst2->U.I.SrcReg[0] = src0; - inst2->U.I.SrcReg[1] = src1; - inst2->U.I.SrcReg[2] = src2; - } - inst2->U.I.DstReg.WriteMask = wmask; - /* finally delete the original instruction */ - rc_remove_instruction(inst1); + inst2->U.I.Opcode = RC_OPCODE_MAD; + inst2->U.I.SrcReg[0] = src0; + inst2->U.I.SrcReg[1] = src1; + inst2->U.I.SrcReg[2] = src2; + } + inst2->U.I.DstReg.WriteMask = wmask; + /* finally delete the original instruction */ + rc_remove_instruction(inst1); - return 1; + return 1; } /** @@ -1155,93 +1091,88 @@ static int merge_mov_add_mul( * becomes * MAD temp[0].xyz const[0].xyz const[2].1yz input[0].0xw */ -static bool merge_mov_mad( - struct radeon_compiler * c, - struct rc_instruction * inst1, - struct rc_instruction * inst2) +static bool +merge_mov_mad(struct radeon_compiler *c, struct rc_instruction *inst1, struct rc_instruction *inst2) { - struct rc_instruction * mov, * mad; - if (inst1->U.I.Opcode == RC_OPCODE_MOV) { - mov = inst1; - mad = inst2; - } else { - mov = inst2; - mad = inst1; - } + struct rc_instruction *mov, *mad; + if (inst1->U.I.Opcode == RC_OPCODE_MOV) { + mov = inst1; + mad = inst2; + } else { + mov = inst2; + mad = inst1; + } - int shared_index = have_shared_source(mad, mov); - unsigned wmask = mov->U.I.DstReg.WriteMask | mad->U.I.DstReg.WriteMask; - struct rc_src_register src[3]; - src[0] = mad->U.I.SrcReg[0]; - src[1] = mad->U.I.SrcReg[1]; - src[2] = mad->U.I.SrcReg[2]; + int shared_index = have_shared_source(mad, mov); + unsigned wmask = mov->U.I.DstReg.WriteMask | mad->U.I.DstReg.WriteMask; + struct rc_src_register src[3]; + src[0] = mad->U.I.SrcReg[0]; + src[1] = mad->U.I.SrcReg[1]; + src[2] = mad->U.I.SrcReg[2]; - /* Shared source is the one for multiplication. */ - if (shared_index == 0 || shared_index == 1) { - src[shared_index].Negate = merge_negates(src[shared_index], mov->U.I.SrcReg[0]); - src[1 - shared_index].Negate = clean_negate(src[1 - shared_index]); - src[shared_index].Swizzle = merge_swizzles(src[shared_index].Swizzle, - mov->U.I.SrcReg[0].Swizzle); - src[1 - shared_index].Swizzle = fill_swizzle( - src[1 - shared_index].Swizzle, wmask, RC_SWIZZLE_ONE); - src[2].Swizzle = fill_swizzle(src[2].Swizzle, wmask, RC_SWIZZLE_ZERO); + /* Shared source is the one for multiplication. */ + if (shared_index == 0 || shared_index == 1) { + src[shared_index].Negate = merge_negates(src[shared_index], mov->U.I.SrcReg[0]); + src[1 - shared_index].Negate = clean_negate(src[1 - shared_index]); + src[shared_index].Swizzle = + merge_swizzles(src[shared_index].Swizzle, mov->U.I.SrcReg[0].Swizzle); + src[1 - shared_index].Swizzle = + fill_swizzle(src[1 - shared_index].Swizzle, wmask, RC_SWIZZLE_ONE); + src[2].Swizzle = fill_swizzle(src[2].Swizzle, wmask, RC_SWIZZLE_ZERO); - /* Shared source is the one for used for addition, or it is none. Additionally, - * if the mov SrcReg is none, we merge it with the addition (third) reg as well - * because than we have the highest change the swizzles will be legal. - */ - } else if (shared_index == 2 || mov->U.I.SrcReg[0].File == RC_FILE_NONE || - src[2].File == RC_FILE_NONE) { - src[2].Negate = merge_negates(src[2], mov->U.I.SrcReg[0]); - src[2].Swizzle = merge_swizzles(src[2].Swizzle, mov->U.I.SrcReg[0].Swizzle); - src[0].Swizzle = fill_swizzle(src[0].Swizzle, wmask, RC_SWIZZLE_ZERO); - src[1].Swizzle = fill_swizzle(src[1].Swizzle, wmask, RC_SWIZZLE_ZERO); - if (src[2].File == RC_FILE_NONE) { - src[2].File = mov->U.I.SrcReg[0].File; - src[2].Index = mov->U.I.SrcReg[0].Index; - src[2].RelAddr = mov->U.I.SrcReg[0].RelAddr; - src[2].Abs = mov->U.I.SrcReg[0].Abs; - } + /* Shared source is the one for used for addition, or it is none. Additionally, + * if the mov SrcReg is none, we merge it with the addition (third) reg as well + * because than we have the highest change the swizzles will be legal. + */ + } else if (shared_index == 2 || mov->U.I.SrcReg[0].File == RC_FILE_NONE || + src[2].File == RC_FILE_NONE) { + src[2].Negate = merge_negates(src[2], mov->U.I.SrcReg[0]); + src[2].Swizzle = merge_swizzles(src[2].Swizzle, mov->U.I.SrcReg[0].Swizzle); + src[0].Swizzle = fill_swizzle(src[0].Swizzle, wmask, RC_SWIZZLE_ZERO); + src[1].Swizzle = fill_swizzle(src[1].Swizzle, wmask, RC_SWIZZLE_ZERO); + if (src[2].File == RC_FILE_NONE) { + src[2].File = mov->U.I.SrcReg[0].File; + src[2].Index = mov->U.I.SrcReg[0].Index; + src[2].RelAddr = mov->U.I.SrcReg[0].RelAddr; + src[2].Abs = mov->U.I.SrcReg[0].Abs; + } - /* First or the second MAD source is RC_FILE_NONE, we merge the mov into it, - * fill the other one with ones and the reg for addition with zeros. - */ - } else if (src[0].File == RC_FILE_NONE || src[1].File == RC_FILE_NONE) { - unsigned none_src = src[0].File == RC_FILE_NONE ? 0 : 1; - src[none_src] = mov->U.I.SrcReg[0]; - src[none_src].Negate = merge_negates(src[none_src], mad->U.I.SrcReg[none_src]); - src[none_src].Swizzle = merge_swizzles(src[none_src].Swizzle, - mad->U.I.SrcReg[none_src].Swizzle); - src[1 - none_src].Negate = clean_negate(src[1 - none_src]); - src[1 - none_src].Swizzle = fill_swizzle(src[1 - none_src].Swizzle, - wmask, RC_SWIZZLE_ONE); - src[2].Swizzle = fill_swizzle(src[2].Swizzle, wmask, RC_SWIZZLE_ZERO); - } else { - return false; - } + /* First or the second MAD source is RC_FILE_NONE, we merge the mov into it, + * fill the other one with ones and the reg for addition with zeros. + */ + } else if (src[0].File == RC_FILE_NONE || src[1].File == RC_FILE_NONE) { + unsigned none_src = src[0].File == RC_FILE_NONE ? 0 : 1; + src[none_src] = mov->U.I.SrcReg[0]; + src[none_src].Negate = merge_negates(src[none_src], mad->U.I.SrcReg[none_src]); + src[none_src].Swizzle = + merge_swizzles(src[none_src].Swizzle, mad->U.I.SrcReg[none_src].Swizzle); + src[1 - none_src].Negate = clean_negate(src[1 - none_src]); + src[1 - none_src].Swizzle = fill_swizzle(src[1 - none_src].Swizzle, wmask, RC_SWIZZLE_ONE); + src[2].Swizzle = fill_swizzle(src[2].Swizzle, wmask, RC_SWIZZLE_ZERO); + } else { + return false; + } - if (!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[0]) || - !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[1]) || - !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[2])) - return false; + if (!c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[0]) || + !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[1]) || + !c->SwizzleCaps->IsNative(RC_OPCODE_MAD, src[2])) + return false; - inst2->U.I.Opcode = RC_OPCODE_MAD; - inst2->U.I.SrcReg[0] = src[0]; - inst2->U.I.SrcReg[1] = src[1]; - inst2->U.I.SrcReg[2] = src[2]; - inst2->U.I.DstReg.WriteMask = wmask; - rc_remove_instruction(inst1); - return true; + inst2->U.I.Opcode = RC_OPCODE_MAD; + inst2->U.I.SrcReg[0] = src[0]; + inst2->U.I.SrcReg[1] = src[1]; + inst2->U.I.SrcReg[2] = src[2]; + inst2->U.I.DstReg.WriteMask = wmask; + rc_remove_instruction(inst1); + return true; } -static bool inst_combination( - struct rc_instruction * inst1, - struct rc_instruction * inst2, - rc_opcode opcode1, - rc_opcode opcode2) +static bool +inst_combination(struct rc_instruction *inst1, struct rc_instruction *inst2, rc_opcode opcode1, + rc_opcode opcode2) { - return ((inst1->U.I.Opcode == opcode1 && inst2->U.I.Opcode == opcode2) || - (inst2->U.I.Opcode == opcode1 && inst1->U.I.Opcode == opcode2)); + return ((inst1->U.I.Opcode == opcode1 && inst2->U.I.Opcode == opcode2) || + (inst2->U.I.Opcode == opcode1 && inst1->U.I.Opcode == opcode2)); } /** @@ -1250,79 +1181,77 @@ static bool inst_combination( * * The potential candidates are combinations of MOVs, ADDs, MULs and MADs. */ -static void merge_channels(struct radeon_compiler * c, struct rc_instruction * inst) +static void +merge_channels(struct radeon_compiler *c, struct rc_instruction *inst) { - unsigned int orig_dst_reg = inst->U.I.DstReg.Index; - unsigned int orig_dst_file = inst->U.I.DstReg.File; - unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; - const struct rc_opcode_info * orig_opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int orig_dst_reg = inst->U.I.DstReg.Index; + unsigned int orig_dst_file = inst->U.I.DstReg.File; + unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; + const struct rc_opcode_info *orig_opcode = rc_get_opcode_info(inst->U.I.Opcode); - struct rc_instruction * cur = inst; - while (cur!= &c->Program.Instructions) { - cur = cur->Next; - const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); + struct rc_instruction *cur = inst; + while (cur != &c->Program.Instructions) { + cur = cur->Next; + const struct rc_opcode_info *opcode = rc_get_opcode_info(cur->U.I.Opcode); - /* Keep it simple for now and stop when encountering any - * control flow. - */ - if (opcode->IsFlowControl) - return; + /* Keep it simple for now and stop when encountering any + * control flow. + */ + if (opcode->IsFlowControl) + return; - /* Stop when the original destination is overwritten */ - if (orig_dst_reg == cur->U.I.DstReg.Index && - orig_dst_file == cur->U.I.DstReg.File && - (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0) - return; + /* Stop when the original destination is overwritten */ + if (orig_dst_reg == cur->U.I.DstReg.Index && orig_dst_file == cur->U.I.DstReg.File && + (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0) + return; - /* Stop the search when the original instruction destination - * is used as a source for anything. - */ - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (cur->U.I.SrcReg[i].File == orig_dst_file && - cur->U.I.SrcReg[i].Index == orig_dst_reg) - return; - } + /* Stop the search when the original instruction destination + * is used as a source for anything. + */ + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (cur->U.I.SrcReg[i].File == orig_dst_file && cur->U.I.SrcReg[i].Index == orig_dst_reg) + return; + } - /* Stop the search when some of the original sources are touched. */ - for (unsigned i = 0; i < orig_opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == cur->U.I.DstReg.File && - inst->U.I.SrcReg[i].Index == cur->U.I.DstReg.Index) - return; - } + /* Stop the search when some of the original sources are touched. */ + for (unsigned i = 0; i < orig_opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == cur->U.I.DstReg.File && + inst->U.I.SrcReg[i].Index == cur->U.I.DstReg.Index) + return; + } - if (cur->U.I.DstReg.File == orig_dst_file && - cur->U.I.DstReg.Index == orig_dst_reg && - cur->U.I.SaturateMode == inst->U.I.SaturateMode && - (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) { + if (cur->U.I.DstReg.File == orig_dst_file && cur->U.I.DstReg.Index == orig_dst_reg && + cur->U.I.SaturateMode == inst->U.I.SaturateMode && + (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) { - if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MOV)) { - if (merge_movs(c, inst, cur)) - return; - } + if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MOV)) { + if (merge_movs(c, inst, cur)) + return; + } - /* Skip the merge if one of the instructions writes just w channel - * and we are compiling a fragment shader. We can pair-schedule it together - * later anyway and it will also give the scheduler a bit more flexibility. - * Only check this after merging MOVs as when we manage to merge two MOVs - * into another MOV we can still copy propagate it away. So it is a win in - * that case. - */ - if (c->has_omod && (cur->U.I.DstReg.WriteMask == RC_MASK_W || - inst->U.I.DstReg.WriteMask == RC_MASK_W)) - continue; + /* Skip the merge if one of the instructions writes just w channel + * and we are compiling a fragment shader. We can pair-schedule it together + * later anyway and it will also give the scheduler a bit more flexibility. + * Only check this after merging MOVs as when we manage to merge two MOVs + * into another MOV we can still copy propagate it away. So it is a win in + * that case. + */ + if (c->has_omod && + (cur->U.I.DstReg.WriteMask == RC_MASK_W || inst->U.I.DstReg.WriteMask == RC_MASK_W)) + continue; - if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_ADD) || - inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MUL)) { - if (merge_mov_add_mul(c, inst, cur)) - return; - } + if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_ADD) || + inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MUL)) { + if (merge_mov_add_mul(c, inst, cur)) + return; + } - if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MAD)) { - if (merge_mov_mad(c, inst, cur)) - return; - } - } - } + if (inst_combination(cur, inst, RC_OPCODE_MOV, RC_OPCODE_MAD)) { + if (merge_mov_mad(c, inst, cur)) + return; + } + } + } } /** @@ -1331,70 +1260,63 @@ static void merge_channels(struct radeon_compiler * c, struct rc_instruction * i * Only a very trivial case is now optimized where if a second one is detected which reads from * the same register as the first one and source is the same, just remove the second one. */ -static void merge_A0_loads( - struct radeon_compiler * c, - struct rc_instruction * inst, - bool is_ARL) +static void +merge_A0_loads(struct radeon_compiler *c, struct rc_instruction *inst, bool is_ARL) { - unsigned int A0_src_reg = inst->U.I.SrcReg[0].Index; - unsigned int A0_src_file = inst->U.I.SrcReg[0].File; - unsigned int A0_src_swizzle = inst->U.I.SrcReg[0].Swizzle; - int cf_depth = 0; + unsigned int A0_src_reg = inst->U.I.SrcReg[0].Index; + unsigned int A0_src_file = inst->U.I.SrcReg[0].File; + unsigned int A0_src_swizzle = inst->U.I.SrcReg[0].Swizzle; + int cf_depth = 0; - struct rc_instruction * cur = inst; - while (cur != &c->Program.Instructions) { - cur = cur->Next; - const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); + struct rc_instruction *cur = inst; + while (cur != &c->Program.Instructions) { + cur = cur->Next; + const struct rc_opcode_info *opcode = rc_get_opcode_info(cur->U.I.Opcode); - /* Keep it simple for now and stop when encountering any - * control flow besides simple ifs. - */ - if (opcode->IsFlowControl) { - switch (cur->U.I.Opcode) { - case RC_OPCODE_IF: - { - cf_depth++; - break; - } - case RC_OPCODE_ELSE: - { - if (cf_depth < 1) - return; - break; - } - case RC_OPCODE_ENDIF: - { - cf_depth--; - break; - } - default: - return; - } - } + /* Keep it simple for now and stop when encountering any + * control flow besides simple ifs. + */ + if (opcode->IsFlowControl) { + switch (cur->U.I.Opcode) { + case RC_OPCODE_IF: { + cf_depth++; + break; + } + case RC_OPCODE_ELSE: { + if (cf_depth < 1) + return; + break; + } + case RC_OPCODE_ENDIF: { + cf_depth--; + break; + } + default: + return; + } + } - /* Stop when the original source is overwritten */ - if (A0_src_reg == cur->U.I.DstReg.Index && - A0_src_file == cur->U.I.DstReg.File && - cur->U.I.DstReg.WriteMask | rc_swizzle_to_writemask(A0_src_swizzle)) - return; + /* Stop when the original source is overwritten */ + if (A0_src_reg == cur->U.I.DstReg.Index && A0_src_file == cur->U.I.DstReg.File && + cur->U.I.DstReg.WriteMask | rc_swizzle_to_writemask(A0_src_swizzle)) + return; - /* Wrong A0 load type. */ - if ((is_ARL && cur->U.I.Opcode == RC_OPCODE_ARR) || - (!is_ARL && cur->U.I.Opcode == RC_OPCODE_ARL)) - return; + /* Wrong A0 load type. */ + if ((is_ARL && cur->U.I.Opcode == RC_OPCODE_ARR) || + (!is_ARL && cur->U.I.Opcode == RC_OPCODE_ARL)) + return; - if (cur->U.I.Opcode == RC_OPCODE_ARL || cur->U.I.Opcode == RC_OPCODE_ARR) { - if (A0_src_reg == cur->U.I.SrcReg[0].Index && - A0_src_file == cur->U.I.SrcReg[0].File && - A0_src_swizzle == cur->U.I.SrcReg[0].Swizzle) { - struct rc_instruction * next = cur->Next; - rc_remove_instruction(cur); - cur = next; - } else { - return; - } - } - } + if (cur->U.I.Opcode == RC_OPCODE_ARL || cur->U.I.Opcode == RC_OPCODE_ARR) { + if (A0_src_reg == cur->U.I.SrcReg[0].Index && A0_src_file == cur->U.I.SrcReg[0].File && + A0_src_swizzle == cur->U.I.SrcReg[0].Swizzle) { + struct rc_instruction *next = cur->Next; + rc_remove_instruction(cur); + cur = next; + } else { + return; + } + } + } } /** @@ -1403,112 +1325,113 @@ static void merge_A0_loads( * is lowered ARR (from nine->ttn). In that case we want to reconstruct * the ARR instead of lowering the round. */ -static void transform_vertex_ROUND(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_vertex_ROUND(struct radeon_compiler *c, struct rc_instruction *inst) { - struct rc_reader_data readers; - rc_get_readers(c, inst, &readers, NULL, NULL, NULL); + struct rc_reader_data readers; + rc_get_readers(c, inst, &readers, NULL, NULL, NULL); - assert(readers.ReaderCount > 0); - for (unsigned i = 0; i < readers.ReaderCount; i++) { - struct rc_instruction *reader = readers.Readers[i].Inst; - if (reader->U.I.Opcode != RC_OPCODE_ARL) { - assert(!"Unable to convert ROUND+ARL to ARR\n"); - return; - } - } + assert(readers.ReaderCount > 0); + for (unsigned i = 0; i < readers.ReaderCount; i++) { + struct rc_instruction *reader = readers.Readers[i].Inst; + if (reader->U.I.Opcode != RC_OPCODE_ARL) { + assert(!"Unable to convert ROUND+ARL to ARR\n"); + return; + } + } - /* Only ARL readers, convert all to ARR */ - for (unsigned i = 0; i < readers.ReaderCount; i++) { - readers.Readers[i].Inst->U.I.Opcode = RC_OPCODE_ARR; - } - /* Switch ROUND to MOV and let copy propagate sort it out later. */ - inst->U.I.Opcode = RC_OPCODE_MOV; + /* Only ARL readers, convert all to ARR */ + for (unsigned i = 0; i < readers.ReaderCount; i++) { + readers.Readers[i].Inst->U.I.Opcode = RC_OPCODE_ARR; + } + /* Switch ROUND to MOV and let copy propagate sort it out later. */ + inst->U.I.Opcode = RC_OPCODE_MOV; } /** * Apply various optimizations specific to the A0 address register loads. */ -static void optimize_A0_loads(struct radeon_compiler * c) { - struct rc_instruction * inst = c->Program.Instructions.Next; - - while (inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_ARL) { - merge_A0_loads(c, cur, true); - } else if (cur->U.I.Opcode == RC_OPCODE_ARR) { - merge_A0_loads(c, cur, false); - } else if (cur->U.I.Opcode == RC_OPCODE_ROUND) { - transform_vertex_ROUND(c, cur); - } - } -} - -void rc_optimize(struct radeon_compiler * c, void *user) +static void +optimize_A0_loads(struct radeon_compiler *c) { - struct rc_instruction * inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - constant_folding(c, cur); - } + struct rc_instruction *inst = c->Program.Instructions.Next; - /* Copy propagate simple movs away. */ - inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_MOV) { - copy_propagate(c, cur); - } - } - - if (c->type == RC_VERTEX_PROGRAM) { - optimize_A0_loads(c); - } - - /* Merge MOVs to same source in different channels using the constant - * swizzle. - */ - if (c->is_r500 || c->type == RC_VERTEX_PROGRAM) { - inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_MOV || - cur->U.I.Opcode == RC_OPCODE_ADD || - cur->U.I.Opcode == RC_OPCODE_MAD || - cur->U.I.Opcode == RC_OPCODE_MUL) - merge_channels(c, cur); - } - } - - /* Copy propagate few extra movs from the merge_channels pass. */ - inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_MOV) { - copy_propagate(c, cur); - } - } - - if (c->type != RC_FRAGMENT_PROGRAM) { - return; - } - - /* Output modifiers. */ - inst = c->Program.Instructions.Next; - struct rc_list * var_list = NULL; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - if (cur->U.I.Opcode == RC_OPCODE_MUL) { - if (!var_list) - var_list = rc_get_variables(c); - if (peephole_mul_omod(c, cur, var_list)) - var_list = NULL; - } - } + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_ARL) { + merge_A0_loads(c, cur, true); + } else if (cur->U.I.Opcode == RC_OPCODE_ARR) { + merge_A0_loads(c, cur, false); + } else if (cur->U.I.Opcode == RC_OPCODE_ROUND) { + transform_vertex_ROUND(c, cur); + } + } +} + +void +rc_optimize(struct radeon_compiler *c, void *user) +{ + struct rc_instruction *inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + constant_folding(c, cur); + } + + /* Copy propagate simple movs away. */ + inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + } + } + + if (c->type == RC_VERTEX_PROGRAM) { + optimize_A0_loads(c); + } + + /* Merge MOVs to same source in different channels using the constant + * swizzle. + */ + if (c->is_r500 || c->type == RC_VERTEX_PROGRAM) { + inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_MOV || cur->U.I.Opcode == RC_OPCODE_ADD || + cur->U.I.Opcode == RC_OPCODE_MAD || cur->U.I.Opcode == RC_OPCODE_MUL) + merge_channels(c, cur); + } + } + + /* Copy propagate few extra movs from the merge_channels pass. */ + inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + } + } + + if (c->type != RC_FRAGMENT_PROGRAM) { + return; + } + + /* Output modifiers. */ + inst = c->Program.Instructions.Next; + struct rc_list *var_list = NULL; + while (inst != &c->Program.Instructions) { + struct rc_instruction *cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_MUL) { + if (!var_list) + var_list = rc_get_variables(c); + if (peephole_mul_omod(c, cur, var_list)) + var_list = NULL; + } + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c index 2a0cdb34084..82d96a5ec33 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c @@ -8,59 +8,58 @@ #include "radeon_opcodes.h" #include "radeon_program_pair.h" -static void mark_used_presub(struct rc_pair_sub_instruction * sub) +static void +mark_used_presub(struct rc_pair_sub_instruction *sub) { - if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { - unsigned int presub_reg_count = rc_presubtract_src_reg_count( - sub->Src[RC_PAIR_PRESUB_SRC].Index); - unsigned int i; - for (i = 0; i < presub_reg_count; i++) { - sub->Src[i].Used = 1; - } - } + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = + rc_presubtract_src_reg_count(sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } } -static void mark_used( - struct rc_instruction * inst, - struct rc_pair_sub_instruction * sub) +static void +mark_used(struct rc_instruction *inst, struct rc_pair_sub_instruction *sub) { - unsigned int i; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - for (i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); - if (src_type & RC_SOURCE_RGB) { - inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; - } + unsigned int i; + const struct rc_opcode_info *info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } - if (src_type & RC_SOURCE_ALPHA) { - inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; - } - } + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } } /** * This pass finds sources that are not used by their instruction and marks - * them as unused. + * them as unused. */ -void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +void +rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user) { - struct rc_instruction * inst; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - unsigned int i; - if (inst->Type == RC_INSTRUCTION_NORMAL) - continue; + struct rc_instruction *inst; + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; - /* Mark all sources as unused */ - for (i = 0; i < 4; i++) { - inst->U.P.RGB.Src[i].Used = 0; - inst->U.P.Alpha.Src[i].Used = 0; - } - mark_used(inst, &inst->U.P.RGB); - mark_used(inst, &inst->U.P.Alpha); + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); - mark_used_presub(&inst->U.P.RGB); - mark_used_presub(&inst->U.P.Alpha); - } + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c index caa704db945..46a348d81e6 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -9,9 +9,9 @@ #include #include "util/glheader.h" +#include "util/ralloc.h" #include "util/register_allocate.h" #include "util/u_memory.h" -#include "util/ralloc.h" #include "r300_fragprog_swizzle.h" #include "radeon_compiler.h" @@ -21,59 +21,59 @@ #include "radeon_regalloc.h" #include "radeon_variable.h" -static void scan_read_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void +scan_read_callback(void *data, struct rc_instruction *inst, rc_register_file file, + unsigned int index, unsigned int mask) { - struct regalloc_state * s = data; - struct register_info * reg; - unsigned int i; + struct regalloc_state *s = data; + struct register_info *reg; + unsigned int i; - if (file != RC_FILE_INPUT) - return; + if (file != RC_FILE_INPUT) + return; - s->Input[index].Used = 1; - reg = &s->Input[index]; + s->Input[index].Used = 1; + reg = &s->Input[index]; - for (i = 0; i < 4; i++) { - if (!((mask >> i) & 0x1)) { - continue; - } - reg->Live[i].Used = 1; - reg->Live[i].Start = 0; - reg->Live[i].End = - s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; - } + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; + } + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; + } } -static void remap_register(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) +static void +remap_register(void *data, struct rc_instruction *inst, rc_register_file *file, unsigned int *index) { - struct regalloc_state * s = data; - const struct register_info * reg; + struct regalloc_state *s = data; + const struct register_info *reg; - if (*file == RC_FILE_TEMPORARY && s->Simple) - reg = &s->Temporary[*index]; - else if (*file == RC_FILE_INPUT) - reg = &s->Input[*index]; - else - return; + if (*file == RC_FILE_TEMPORARY && s->Simple) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; - if (reg->Allocated) { - *index = reg->Index; - } + if (reg->Allocated) { + *index = reg->Index; + } } -static void alloc_input_simple(void * data, unsigned int input, - unsigned int hwreg) +static void +alloc_input_simple(void *data, unsigned int input, unsigned int hwreg) { - struct regalloc_state * s = data; + struct regalloc_state *s = data; - if (input >= s->NumInputs) - return; + if (input >= s->NumInputs) + return; - s->Input[input].Allocated = 1; - s->Input[input].File = RC_FILE_TEMPORARY; - s->Input[input].Index = hwreg; + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; } /* This functions offsets the temporary register indices by the number @@ -82,282 +82,263 @@ static void alloc_input_simple(void * data, unsigned int input, * * This pass is supposed to be used to maintain correct allocation of inputs * if the standard register allocation is disabled. */ -static void do_regalloc_inputs_only(struct regalloc_state * s) +static void +do_regalloc_inputs_only(struct regalloc_state *s) { - for (unsigned i = 0; i < s->NumTemporaries; i++) { - s->Temporary[i].Allocated = 1; - s->Temporary[i].File = RC_FILE_TEMPORARY; - s->Temporary[i].Index = i + s->NumInputs; - } + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } } -static unsigned int is_derivative(rc_opcode op) +static unsigned int +is_derivative(rc_opcode op) { - return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); } struct variable_get_class_cb_data { - unsigned int * can_change_writemask; - unsigned int conversion_swizzle; - struct radeon_compiler * c; + unsigned int *can_change_writemask; + unsigned int conversion_swizzle; + struct radeon_compiler *c; }; -static void variable_get_class_read_cb( - void * userdata, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) +static void +variable_get_class_read_cb(void *userdata, struct rc_instruction *inst, + struct rc_pair_instruction_arg *arg, + struct rc_pair_instruction_source *src) { - struct variable_get_class_cb_data * d = userdata; - unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, - d->conversion_swizzle); - /* We can't just call r300_swizzle_is_native basic here, because it ignores the - * extra requirements for presubtract. However, after pair translation we no longer - * have the rc_src_register required for the native swizzle, so we have to - * reconstruct it. */ - struct rc_src_register reg = {}; - reg.Swizzle = new_swizzle; - reg.File = src->File; + struct variable_get_class_cb_data *d = userdata; + unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, d->conversion_swizzle); + /* We can't just call r300_swizzle_is_native basic here, because it ignores the + * extra requirements for presubtract. However, after pair translation we no longer + * have the rc_src_register required for the native swizzle, so we have to + * reconstruct it. */ + struct rc_src_register reg = {}; + reg.Swizzle = new_swizzle; + reg.File = src->File; - assert(inst->Type == RC_INSTRUCTION_PAIR); - /* The opcode is unimportant, we can't have TEX here. */ - if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) { - *d->can_change_writemask = 0; - } + assert(inst->Type == RC_INSTRUCTION_PAIR); + /* The opcode is unimportant, we can't have TEX here. */ + if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) { + *d->can_change_writemask = 0; + } } -static unsigned variable_get_class( - struct rc_variable * variable, - const struct rc_class * classes) +static unsigned +variable_get_class(struct rc_variable *variable, const struct rc_class *classes) { - unsigned int i; - unsigned int can_change_writemask= 1; - unsigned int writemask = rc_variable_writemask_sum(variable); - struct rc_list * readers = rc_variable_readers_union(variable); - int class_index; + unsigned int i; + unsigned int can_change_writemask = 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list *readers = rc_variable_readers_union(variable); + int class_index; - if (!variable->C->is_r500) { - struct rc_class c; - struct rc_variable * var_ptr; - /* The assumption here is that if an instruction has type - * RC_INSTRUCTION_NORMAL then it is a TEX instruction. - * r300 and r400 can't swizzle the result of a TEX lookup. */ - for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { - if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { - writemask = RC_MASK_XYZW; - } - } + if (!variable->C->is_r500) { + struct rc_class c; + struct rc_variable *var_ptr; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + } - /* Check if it is possible to do swizzle packing for r300/r400 - * without creating non-native swizzles. */ - class_index = rc_find_class(classes, writemask, 3); - if (class_index < 0) { - goto error; - } - c = classes[class_index]; - if (c.WritemaskCount == 1) { - goto done; - } - for (i = 0; i < c.WritemaskCount; i++) { - struct rc_variable * var_ptr; - for (var_ptr = variable; var_ptr; - var_ptr = var_ptr->Friend) { - int j; - unsigned int conversion_swizzle = - rc_make_conversion_swizzle( - writemask, c.Writemasks[i]); - struct variable_get_class_cb_data d; - d.can_change_writemask = &can_change_writemask; - d.conversion_swizzle = conversion_swizzle; - d.c = variable->C; - /* If we get this far var_ptr->Inst has to - * be a pair instruction. If variable or any - * of its friends are normal instructions, - * then the writemask will be set to RC_MASK_XYZW - * and the function will return before it gets - * here. */ - rc_pair_for_all_reads_arg(var_ptr->Inst, - variable_get_class_read_cb, &d); + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = rc_find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + if (c.WritemaskCount == 1) { + goto done; + } + for (i = 0; i < c.WritemaskCount; i++) { + struct rc_variable *var_ptr; + for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(writemask, c.Writemasks[i]); + struct variable_get_class_cb_data d; + d.can_change_writemask = &can_change_writemask; + d.conversion_swizzle = conversion_swizzle; + d.c = variable->C; + /* If we get this far var_ptr->Inst has to + * be a pair instruction. If variable or any + * of its friends are normal instructions, + * then the writemask will be set to RC_MASK_XYZW + * and the function will return before it gets + * here. */ + rc_pair_for_all_reads_arg(var_ptr->Inst, variable_get_class_read_cb, &d); - for (j = 0; j < var_ptr->ReaderCount; j++) { - unsigned int old_swizzle; - unsigned int new_swizzle; - struct rc_reader r = var_ptr->Readers[j]; - if (r.Inst->Type == - RC_INSTRUCTION_PAIR ) { - old_swizzle = r.U.P.Arg->Swizzle; - } else { - /* Source operands of TEX - * instructions can't be - * swizzle on r300/r400 GPUs. - */ - can_change_writemask = 0; - break; - } - new_swizzle = rc_rewrite_swizzle( - old_swizzle, conversion_swizzle); - if (!r300_swizzle_is_native_basic( - new_swizzle)) { - can_change_writemask = 0; - break; - } - } - if (!can_change_writemask) { - break; - } - } - if (!can_change_writemask) { - break; - } - } - } + for (j = 0; j < var_ptr->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = var_ptr->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + /* Source operands of TEX + * instructions can't be + * swizzle on r300/r400 GPUs. + */ + can_change_writemask = 0; + break; + } + new_swizzle = rc_rewrite_swizzle(old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; + } + } + if (!can_change_writemask) { + break; + } + } + } - if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { - /* DDX/DDY seem to always fail when their writemasks are - * changed.*/ - if (is_derivative(variable->Inst->U.P.RGB.Opcode) - || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { - can_change_writemask = 0; - } - } - for ( ; readers; readers = readers->Next) { - struct rc_reader * r = readers->Item; - if (r->Inst->Type == RC_INSTRUCTION_PAIR) { - if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { - can_change_writemask = 0; - break; - } - /* DDX/DDY also fail when their swizzles are changed. */ - if (is_derivative(r->Inst->U.P.RGB.Opcode) - || is_derivative(r->Inst->U.P.Alpha.Opcode)) { - can_change_writemask = 0; - break; - } - } - } + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) || + is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for (; readers; readers = readers->Next) { + struct rc_reader *r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } - class_index = rc_find_class(classes, writemask, - can_change_writemask ? 3 : 1); + class_index = rc_find_class(classes, writemask, can_change_writemask ? 3 : 1); done: - if (class_index > -1) { - return classes[class_index].ID; - } else { -error: - rc_error(variable->C, - "Could not find class for index=%u mask=%u\n", - variable->Dst.Index, writemask); - return 0; - } + if (class_index > -1) { + return classes[class_index].ID; + } else { + error: + rc_error(variable->C, "Could not find class for index=%u mask=%u\n", variable->Dst.Index, + writemask); + return 0; + } } -static void do_advanced_regalloc(struct regalloc_state * s) +static void +do_advanced_regalloc(struct regalloc_state *s) { - unsigned int i, input_node, node_count, node_index; - struct ra_class ** node_classes; - struct rc_instruction * inst; - struct rc_list * var_ptr; - struct rc_list * variables; - struct ra_graph * graph; - const struct rc_regalloc_state *ra_state = s->C->regalloc_state; + unsigned int i, input_node, node_count, node_index; + struct ra_class **node_classes; + struct rc_instruction *inst; + struct rc_list *var_ptr; + struct rc_list *variables; + struct ra_graph *graph; + const struct rc_regalloc_state *ra_state = s->C->regalloc_state; - /* Get list of program variables */ - variables = rc_get_variables(s->C); - node_count = rc_list_count(variables); - node_classes = memory_pool_malloc(&s->C->Pool, - node_count * sizeof(struct ra_class *)); + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, node_count * sizeof(struct ra_class *)); - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - unsigned int class_index; - /* Compute the live intervals */ - rc_variable_compute_live_intervals(var_ptr->Item); + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); - class_index = variable_get_class(var_ptr->Item, ra_state->class_list); - node_classes[node_index] = ra_state->classes[class_index]; - } + class_index = variable_get_class(var_ptr->Item, ra_state->class_list); + node_classes[node_index] = ra_state->classes[class_index]; + } + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction *endloop = rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } - /* Calculate live intervals for input registers */ - for (inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_BGNLOOP) { - struct rc_instruction * endloop = - rc_match_bgnloop(inst); - if (endloop->IP > s->LoopEnd) { - s->LoopEnd = endloop->IP; - } - } - rc_for_all_reads_mask(inst, scan_read_callback, s); - } + /* Compute the writemask for inputs. */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + } - /* Compute the writemask for inputs. */ - for (i = 0; i < s->NumInputs; i++) { - unsigned int chan, writemask = 0; - for (chan = 0; chan < 4; chan++) { - if (s->Input[i].Live[chan].Used) { - writemask |= (1 << chan); - } - } - s->Input[i].Writemask = writemask; - } + graph = ra_alloc_interference_graph(ra_state->regs, node_count + s->NumInputs); - graph = ra_alloc_interference_graph(ra_state->regs, - node_count + s->NumInputs); + for (node_index = 0; node_index < node_count; node_index++) { + ra_set_node_class(graph, node_index, node_classes[node_index]); + } - for (node_index = 0; node_index < node_count; node_index++) { - ra_set_node_class(graph, node_index, node_classes[node_index]); - } + rc_build_interference_graph(graph, variables); - rc_build_interference_graph(graph, variables); + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i < s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable *var = var_ptr->Item; + if (rc_overlap_live_intervals_array(s->Input[i].Live, var->Live)) { + ra_add_node_interference(graph, node_index, node_count + input_node); + } + } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, + get_reg_id(s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } - /* Add input registers to the interference graph */ - for (i = 0, input_node = 0; i< s->NumInputs; i++) { - if (!s->Input[i].Writemask) { - continue; - } - for (var_ptr = variables, node_index = 0; - var_ptr; var_ptr = var_ptr->Next, node_index++) { - struct rc_variable * var = var_ptr->Item; - if (rc_overlap_live_intervals_array(s->Input[i].Live, - var->Live)) { - ra_add_node_interference(graph, node_index, - node_count + input_node); - } - } - /* Manually allocate a register for this input */ - ra_set_node_reg(graph, node_count + input_node, get_reg_id( - s->Input[i].Index, s->Input[i].Writemask)); - input_node++; - } + if (!ra_allocate(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + ralloc_free(graph); + return; + } - if (!ra_allocate(graph)) { - rc_error(s->C, "Ran out of hardware temporaries\n"); - ralloc_free(graph); - return; - } + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable *var = var_ptr->Item; - /* Rewrite the registers */ - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - int reg = ra_get_node_reg(graph, node_index); - unsigned int writemask = reg_get_writemask(reg); - unsigned int index = reg_get_index(reg); - struct rc_variable * var = var_ptr->Item; + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } - if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { - writemask = rc_variable_writemask_sum(var); - } + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } - if (var->Dst.File == RC_FILE_INPUT) { - continue; - } - rc_variable_change_dst(var, index, writemask); - } - - ralloc_free(graph); + ralloc_free(graph); } /** @@ -366,41 +347,38 @@ static void do_advanced_regalloc(struct regalloc_state * s) * only allocates space for input registers (\sa do_regalloc_inputs_only). If * user is non-zero, then the regular register allocator will be used * (\sa do_regalloc). - */ -void rc_pair_regalloc(struct radeon_compiler *cc, void *user) + */ +void +rc_pair_regalloc(struct radeon_compiler *cc, void *user) { - struct r300_fragment_program_compiler *c = - (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; - int * do_full_regalloc = (int*)user; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc; + struct regalloc_state s; + int *do_full_regalloc = (int *)user; - memset(&s, 0, sizeof(s)); - s.C = cc; - s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; - s.Input = memory_pool_malloc(&cc->Pool, - s.NumInputs * sizeof(struct register_info)); - memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); - s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; - s.Temporary = memory_pool_malloc(&cc->Pool, - s.NumTemporaries * sizeof(struct register_info)); - memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); - rc_recompute_ips(s.C); + rc_recompute_ips(s.C); - c->AllocateHwInputs(c, &alloc_input_simple, &s); - if (*do_full_regalloc) { - do_advanced_regalloc(&s); - } else { - s.Simple = 1; - do_regalloc_inputs_only(&s); - } + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } - /* Rewrite inputs and if we are doing the simple allocation, rewrite - * temporaries too. */ - for (struct rc_instruction *inst = s.C->Program.Instructions.Next; - inst != &s.C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, &s); - } + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c index d0d0ce8dad6..8b61a83c783 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -17,55 +17,58 @@ #define VERBOSE 0 -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) +#define DBG(...) \ + do { \ + if (VERBOSE) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) struct schedule_instruction { - struct rc_instruction * Instruction; + struct rc_instruction *Instruction; - /** Next instruction in the linked list of ready instructions. */ - struct schedule_instruction *NextReady; + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; - /** Values that this instruction reads and writes */ - struct reg_value * WriteValues[4]; - struct reg_value * ReadValues[12]; - unsigned int NumWriteValues:3; - unsigned int NumReadValues:4; + /** Values that this instruction reads and writes */ + struct reg_value *WriteValues[4]; + struct reg_value *ReadValues[12]; + unsigned int NumWriteValues : 3; + unsigned int NumReadValues : 4; - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - unsigned int NumDependencies:5; + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies : 5; - /** List of all readers (see rc_get_readers() for the definition of - * "all readers"), even those outside the basic block this instruction - * lives in. */ - struct rc_reader_data GlobalReaders; + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; - /** If the scheduler has paired an RGB and an Alpha instruction together, - * PairedInst references the alpha instruction's dependency information. - */ - struct schedule_instruction * PairedInst; + /** If the scheduler has paired an RGB and an Alpha instruction together, + * PairedInst references the alpha instruction's dependency information. + */ + struct schedule_instruction *PairedInst; - /** This scheduler uses the value of Score to determine which - * instruction to schedule. Instructions with a higher value of Score - * will be scheduled first. */ - int Score; + /** This scheduler uses the value of Score to determine which + * instruction to schedule. Instructions with a higher value of Score + * will be scheduled first. */ + int Score; - /** The number of components that read from a TEX instruction. */ - unsigned TexReadCount; + /** The number of components that read from a TEX instruction. */ + unsigned TexReadCount; - /** For TEX instructions a list of readers */ - struct rc_list * TexReaders; + /** For TEX instructions a list of readers */ + struct rc_list *TexReaders; }; - /** * Used to keep track of which instructions read a value. */ struct reg_value_reader { - struct schedule_instruction *Reader; - struct reg_value_reader *Next; + struct schedule_instruction *Reader; + struct reg_value_reader *Next; }; /** @@ -73,178 +76,182 @@ struct reg_value_reader { * RC_FILE_TEMPORARY. */ struct reg_value { - struct schedule_instruction * Writer; + struct schedule_instruction *Writer; - /** - * Unordered linked list of instructions that read from this value. - * When this value becomes available, we increase all readers' - * dependency count. - */ - struct reg_value_reader *Readers; + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; - /** - * Number of readers of this value. This is decremented each time - * a reader of the value is committed. - * When the reader count reaches zero, the dependency count - * of the instruction writing \ref Next is decremented. - */ - unsigned int NumReaders; + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader count reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; - struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ }; struct register_state { - struct reg_value * Values[4]; + struct reg_value *Values[4]; }; struct remap_reg { - struct rc_instruction * Inst; - unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); - unsigned int OldSwizzle:3; - unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); - unsigned int NewSwizzle:3; - unsigned int OnlyTexReads:1; - struct remap_reg * Next; + struct rc_instruction *Inst; + unsigned int OldIndex : (RC_REGISTER_INDEX_BITS + 1); + unsigned int OldSwizzle : 3; + unsigned int NewIndex : (RC_REGISTER_INDEX_BITS + 1); + unsigned int NewSwizzle : 3; + unsigned int OnlyTexReads : 1; + struct remap_reg *Next; }; struct schedule_state { - struct radeon_compiler * C; - struct schedule_instruction * Current; - /** Array of the previous writers of Current's destination register - * indexed by channel. */ - struct schedule_instruction * PrevWriter[4]; + struct radeon_compiler *C; + struct schedule_instruction *Current; + /** Array of the previous writers of Current's destination register + * indexed by channel. */ + struct schedule_instruction *PrevWriter[4]; - struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; - /** - * Linked lists of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - /*@{*/ - struct schedule_instruction *ReadyFullALU; - struct schedule_instruction *ReadyRGB; - struct schedule_instruction *ReadyAlpha; - struct schedule_instruction *ReadyTEX; - /*@}*/ - struct rc_list *PendingTEX; + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ + struct rc_list *PendingTEX; - void (*CalcScore)(struct schedule_instruction *); - long max_tex_group; - unsigned PrevBlockHasTex:1; - unsigned PrevBlockHasKil:1; - unsigned TEXCount; - unsigned Opt:1; + void (*CalcScore)(struct schedule_instruction *); + long max_tex_group; + unsigned PrevBlockHasTex : 1; + unsigned PrevBlockHasKil : 1; + unsigned TEXCount; + unsigned Opt : 1; }; -static struct reg_value ** get_reg_valuep(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) +static struct reg_value ** +get_reg_valuep(struct schedule_state *s, rc_register_file file, unsigned int index, + unsigned int chan) { - if (file != RC_FILE_TEMPORARY) - return NULL; + if (file != RC_FILE_TEMPORARY) + return NULL; - if (index >= RC_REGISTER_MAX_INDEX) { - rc_error(s->C, "%s: index %i out of bounds\n", __func__, index); - return NULL; - } + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __func__, index); + return NULL; + } - return &s->Temporary[index].Values[chan]; + return &s->Temporary[index].Values[chan]; } -static unsigned get_tex_read_count(struct schedule_instruction * sinst) +static unsigned +get_tex_read_count(struct schedule_instruction *sinst) { - unsigned tex_read_count = sinst->TexReadCount; - if (sinst->PairedInst) { - tex_read_count += sinst->PairedInst->TexReadCount; - } - return tex_read_count; + unsigned tex_read_count = sinst->TexReadCount; + if (sinst->PairedInst) { + tex_read_count += sinst->PairedInst->TexReadCount; + } + return tex_read_count; } #if VERBOSE -static void print_list(struct schedule_instruction * sinst) +static void +print_list(struct schedule_instruction *sinst) { - struct schedule_instruction * ptr; - for (ptr = sinst; ptr; ptr=ptr->NextReady) { - unsigned tex_read_count = get_tex_read_count(ptr); - unsigned score = sinst->Score; - fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, - tex_read_count); - } - fprintf(stderr, "\n"); + struct schedule_instruction *ptr; + for (ptr = sinst; ptr; ptr = ptr->NextReady) { + unsigned tex_read_count = get_tex_read_count(ptr); + unsigned score = sinst->Score; + fprintf(stderr, "%u (%d) [%u],", ptr->Instruction->IP, score, tex_read_count); + } + fprintf(stderr, "\n"); } #endif -static void remove_inst_from_list(struct schedule_instruction ** list, - struct schedule_instruction * inst) +static void +remove_inst_from_list(struct schedule_instruction **list, struct schedule_instruction *inst) { - struct schedule_instruction * prev = NULL; - struct schedule_instruction * list_ptr; - for (list_ptr = *list; list_ptr; prev = list_ptr, - list_ptr = list_ptr->NextReady) { - if (list_ptr == inst) { - if (prev) { - prev->NextReady = inst->NextReady; - } else { - *list = inst->NextReady; - } - inst->NextReady = NULL; - break; - } - } + struct schedule_instruction *prev = NULL; + struct schedule_instruction *list_ptr; + for (list_ptr = *list; list_ptr; prev = list_ptr, list_ptr = list_ptr->NextReady) { + if (list_ptr == inst) { + if (prev) { + prev->NextReady = inst->NextReady; + } else { + *list = inst->NextReady; + } + inst->NextReady = NULL; + break; + } + } } -static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +static void +add_inst_to_list(struct schedule_instruction **list, struct schedule_instruction *inst) { - inst->NextReady = *list; - *list = inst; + inst->NextReady = *list; + *list = inst; } -static void add_inst_to_list_score(struct schedule_instruction ** list, - struct schedule_instruction * inst) +static void +add_inst_to_list_score(struct schedule_instruction **list, struct schedule_instruction *inst) { - struct schedule_instruction * temp; - struct schedule_instruction * prev; - if (!*list) { - *list = inst; - return; - } - temp = *list; - prev = NULL; - while(temp && inst->Score <= temp->Score) { - prev = temp; - temp = temp->NextReady; - } + struct schedule_instruction *temp; + struct schedule_instruction *prev; + if (!*list) { + *list = inst; + return; + } + temp = *list; + prev = NULL; + while (temp && inst->Score <= temp->Score) { + prev = temp; + temp = temp->NextReady; + } - if (!prev) { - inst->NextReady = temp; - *list = inst; - } else { - prev->NextReady = inst; - inst->NextReady = temp; - } + if (!prev) { + inst->NextReady = temp; + *list = inst; + } else { + prev->NextReady = inst; + inst->NextReady = temp; + } } -static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +static void +instruction_ready(struct schedule_state *s, struct schedule_instruction *sinst) { - DBG("%i is now ready\n", sinst->Instruction->IP); + DBG("%i is now ready\n", sinst->Instruction->IP); - /* Adding Ready TEX instructions to the end of the "Ready List" helps - * us emit TEX instructions in blocks without losing our place. */ - if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) - add_inst_to_list_score(&s->ReadyTEX, sinst); - else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) - add_inst_to_list_score(&s->ReadyRGB, sinst); - else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) - add_inst_to_list_score(&s->ReadyAlpha, sinst); - else - add_inst_to_list_score(&s->ReadyFullALU, sinst); + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list_score(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list_score(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list_score(&s->ReadyAlpha, sinst); + else + add_inst_to_list_score(&s->ReadyFullALU, sinst); } -static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +static void +decrease_dependencies(struct schedule_state *s, struct schedule_instruction *sinst) { - assert(sinst->NumDependencies > 0); - sinst->NumDependencies--; - if (!sinst->NumDependencies) - instruction_ready(s, sinst); + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); } /* These functions provide different heuristics for scheduling instructions. @@ -279,137 +286,141 @@ static void calc_score_deps(struct schedule_instruction * sinst) #define NO_OUTPUT_SCORE (1 << 24) -static void score_no_output(struct schedule_instruction * sinst) +static void +score_no_output(struct schedule_instruction *sinst) { - assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); - if (!sinst->Instruction->U.P.RGB.OutputWriteMask && - !sinst->Instruction->U.P.Alpha.OutputWriteMask) { - if (sinst->PairedInst) { - if (!sinst->PairedInst->Instruction->U.P. - RGB.OutputWriteMask - && !sinst->PairedInst->Instruction->U.P. - Alpha.OutputWriteMask) { - sinst->Score |= NO_OUTPUT_SCORE; - } + assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); + if (!sinst->Instruction->U.P.RGB.OutputWriteMask && + !sinst->Instruction->U.P.Alpha.OutputWriteMask) { + if (sinst->PairedInst) { + if (!sinst->PairedInst->Instruction->U.P.RGB.OutputWriteMask && + !sinst->PairedInst->Instruction->U.P.Alpha.OutputWriteMask) { + sinst->Score |= NO_OUTPUT_SCORE; + } - } else { - sinst->Score |= NO_OUTPUT_SCORE; - } - } + } else { + sinst->Score |= NO_OUTPUT_SCORE; + } + } } #define PAIRED_SCORE (1 << 16) -static void calc_score_r300(struct schedule_instruction * sinst) +static void +calc_score_r300(struct schedule_instruction *sinst) { - unsigned src_idx; + unsigned src_idx; - if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { - sinst->Score = 0; - return; - } + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + return; + } - score_no_output(sinst); + score_no_output(sinst); - if (sinst->PairedInst) { - sinst->Score |= PAIRED_SCORE; - return; - } + if (sinst->PairedInst) { + sinst->Score |= PAIRED_SCORE; + return; + } - for (src_idx = 0; src_idx < 4; src_idx++) { - sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + - sinst->Instruction->U.P.Alpha.Src[src_idx].Used; - } + for (src_idx = 0; src_idx < 4; src_idx++) { + sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + + sinst->Instruction->U.P.Alpha.Src[src_idx].Used; + } } #define NO_READ_TEX_SCORE (1 << 16) -static void calc_score_readers(struct schedule_instruction * sinst) +static void +calc_score_readers(struct schedule_instruction *sinst) { - if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { - sinst->Score = 0; - } else { - sinst->Score = sinst->NumReadValues; - if (sinst->PairedInst) { - sinst->Score += sinst->PairedInst->NumReadValues; - } - if (get_tex_read_count(sinst) == 0) { - sinst->Score |= NO_READ_TEX_SCORE; - } - score_no_output(sinst); - } + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + } else { + sinst->Score = sinst->NumReadValues; + if (sinst->PairedInst) { + sinst->Score += sinst->PairedInst->NumReadValues; + } + if (get_tex_read_count(sinst) == 0) { + sinst->Score |= NO_READ_TEX_SCORE; + } + score_no_output(sinst); + } } /** * This function decreases the dependencies of the next instruction that * wants to write to each of sinst's read values. */ -static void commit_update_reads(struct schedule_state * s, - struct schedule_instruction * sinst){ - do { - for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { - struct reg_value * v = sinst->ReadValues[i]; - assert(v->NumReaders > 0); - v->NumReaders--; - if (!v->NumReaders) { - if (v->Next) { - decrease_dependencies(s, v->Next->Writer); - } - } - } - } while ((sinst = sinst->PairedInst)); -} - -static void commit_update_writes(struct schedule_state * s, - struct schedule_instruction * sinst){ - do { - for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { - struct reg_value * v = sinst->WriteValues[i]; - if (v->NumReaders) { - for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { - decrease_dependencies(s, r->Reader); - } - } else { - /* This happens in instruction sequences of the type - * OP r.x, ...; - * OP r.x, r.x, ...; - * See also the subtlety in how instructions that both - * read and write the same register are scanned. - */ - if (v->Next) - decrease_dependencies(s, v->Next->Writer); - } - } - } while ((sinst = sinst->PairedInst)); -} - -static void notify_sem_wait(struct schedule_state *s) +static void +commit_update_reads(struct schedule_state *s, struct schedule_instruction *sinst) { - struct rc_list * pend_ptr; - for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { - struct rc_list * read_ptr; - struct schedule_instruction * pending = pend_ptr->Item; - for (read_ptr = pending->TexReaders; read_ptr; - read_ptr = read_ptr->Next) { - struct schedule_instruction * reader = read_ptr->Item; - reader->TexReadCount--; - } - } - s->PendingTEX = NULL; + do { + for (unsigned int i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value *v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) { + decrease_dependencies(s, v->Next->Writer); + } + } + } + } while ((sinst = sinst->PairedInst)); } -static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +static void +commit_update_writes(struct schedule_state *s, struct schedule_instruction *sinst) { - DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); + do { + for (unsigned int i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value *v = sinst->WriteValues[i]; + if (v->NumReaders) { + for (struct reg_value_reader *r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } + } while ((sinst = sinst->PairedInst)); +} - commit_update_reads(s, sinst); +static void +notify_sem_wait(struct schedule_state *s) +{ + struct rc_list *pend_ptr; + for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { + struct rc_list *read_ptr; + struct schedule_instruction *pending = pend_ptr->Item; + for (read_ptr = pending->TexReaders; read_ptr; read_ptr = read_ptr->Next) { + struct schedule_instruction *reader = read_ptr->Item; + reader->TexReadCount--; + } + } + s->PendingTEX = NULL; +} - commit_update_writes(s, sinst); +static void +commit_alu_instruction(struct schedule_state *s, struct schedule_instruction *sinst) +{ + DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); - if (get_tex_read_count(sinst) > 0) { - sinst->Instruction->U.P.SemWait = 1; - notify_sem_wait(s); - } + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); + + if (get_tex_read_count(sinst) > 0) { + sinst->Instruction->U.P.SemWait = 1; + notify_sem_wait(s); + } } /** @@ -418,52 +429,53 @@ static void commit_alu_instruction(struct schedule_state * s, struct schedule_in * Emit as a single block to (hopefully) sample many textures in parallel, * and to avoid hardware indirections on R300. */ -static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +static void +emit_all_tex(struct schedule_state *s, struct rc_instruction *before) { - struct schedule_instruction *readytex; - struct rc_instruction * inst_begin; + struct schedule_instruction *readytex; + struct rc_instruction *inst_begin; - assert(s->ReadyTEX); - notify_sem_wait(s); + assert(s->ReadyTEX); + notify_sem_wait(s); - /* Node marker for R300 */ - inst_begin = rc_insert_new_instruction(s->C, before->Prev); - inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + /* Node marker for R300 */ + inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; - /* Link texture instructions back in */ - readytex = s->ReadyTEX; - while(readytex) { - rc_insert_instruction(before->Prev, readytex->Instruction); - DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + /* Link texture instructions back in */ + readytex = s->ReadyTEX; + while (readytex) { + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); - /* All of the TEX instructions in the same TEX block have - * their source registers read from before any of the - * instructions in that block write to their destination - * registers. This means that when we commit a TEX - * instruction, any other TEX instruction that wants to write - * to one of the committed instruction's source register can be - * marked as ready and should be emitted in the same TEX - * block. This prevents the following sequence from being - * emitted in two different TEX blocks: - * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; - * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; - */ - commit_update_reads(s, readytex); - readytex = readytex->NextReady; - } - readytex = s->ReadyTEX; - s->ReadyTEX = NULL; - while(readytex){ - DBG("%i: commit TEX writes\n", readytex->Instruction->IP); - commit_update_writes(s, readytex); - /* Set semaphore bits for last TEX instruction in the block */ - if (!readytex->NextReady) { - readytex->Instruction->U.I.TexSemAcquire = 1; - readytex->Instruction->U.I.TexSemWait = 1; - } - rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); - readytex = readytex->NextReady; - } + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = NULL; + while (readytex) { + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); + /* Set semaphore bits for last TEX instruction in the block */ + if (!readytex->NextReady) { + readytex->Instruction->U.I.TexSemAcquire = 1; + readytex->Instruction->U.I.TexSemWait = 1; + } + rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); + readytex = readytex->NextReady; + } } /* This is a helper function for destructive_merge_instructions(). It helps @@ -474,190 +486,182 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo * @return 0 if merging the presubtract sources fails. * @return 1 if merging the presubtract sources succeeds. */ -static int merge_presub_sources( - struct rc_pair_instruction * dst_full, - struct rc_pair_sub_instruction src, - unsigned int type) +static int +merge_presub_sources(struct rc_pair_instruction *dst_full, struct rc_pair_sub_instruction src, + unsigned int type) { - unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; - struct rc_pair_sub_instruction * dst_sub; - const struct rc_opcode_info * info; + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction *dst_sub; + const struct rc_opcode_info *info; - assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); - switch(type) { - case RC_SOURCE_RGB: - is_rgb = 1; - is_alpha = 0; - dst_sub = &dst_full->RGB; - break; - case RC_SOURCE_ALPHA: - is_rgb = 0; - is_alpha = 1; - dst_sub = &dst_full->Alpha; - break; - default: - assert(0); - return 0; - } + switch (type) { + case RC_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } - info = rc_get_opcode_info(dst_full->RGB.Opcode); + info = rc_get_opcode_info(dst_full->RGB.Opcode); - if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) - return 0; + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; - srcp_regs = rc_presubtract_src_reg_count( - src.Src[RC_PAIR_PRESUB_SRC].Index); - for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { - unsigned int arg; - int free_source; - unsigned int one_way = 0; - struct rc_pair_instruction_source srcp = src.Src[srcp_src]; - struct rc_pair_instruction_source temp; + srcp_regs = rc_presubtract_src_reg_count(src.Src[RC_PAIR_PRESUB_SRC].Index); + for (srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; - free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, - srcp.File, srcp.Index); + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, srcp.File, srcp.Index); - /* If free_source < 0 then there are no free source - * slots. */ - if (free_source < 0) - return 0; + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; - temp = dst_sub->Src[srcp_src]; - dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; - /* srcp needs src0 and src1 to be the same */ - if (free_source < srcp_src) { - if (!temp.Used) - continue; - free_source = rc_pair_alloc_source(dst_full, is_rgb, - is_alpha, temp.File, temp.Index); - if (free_source < 0) - return 0; - one_way = 1; - } else { - dst_sub->Src[free_source] = temp; - } + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } - /* If free_source == srcp_src, then the presubtract - * source is already in the correct place. */ - if (free_source == srcp_src) - continue; + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; - /* Shuffle the sources, so we can put the - * presubtract source in the correct place. */ - for(arg = 0; arg < info->NumSrcRegs; arg++) { - /* If the arg does read both from rgb and alpha, then we need to rewrite - * both sources and the code currently doesn't handle this. - * FIXME: This is definitely solvable, however shader-db shows it is - * not worth the effort. - */ - if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA && - rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB) - return 0; + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for (arg = 0; arg < info->NumSrcRegs; arg++) { + /* If the arg does read both from rgb and alpha, then we need to rewrite + * both sources and the code currently doesn't handle this. + * FIXME: This is definitely solvable, however shader-db shows it is + * not worth the effort. + */ + if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA && + rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB) + return 0; - /*If this arg does not read from an rgb source, - * do nothing. */ - if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) - & type)) { - continue; - } + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & type)) { + continue; + } - if (dst_full->RGB.Arg[arg].Source == srcp_src) - dst_full->RGB.Arg[arg].Source = free_source; - /* We need to do this just in case register - * is one of the sources already, but in the - * wrong spot. */ - else if(dst_full->RGB.Arg[arg].Source == free_source - && !one_way) { - dst_full->RGB.Arg[arg].Source = srcp_src; - } - } - } - return 1; + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if (dst_full->RGB.Arg[arg].Source == free_source && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; } - /* This function assumes that rgb.Alpha and alpha.RGB are unused */ -static int destructive_merge_instructions( - struct rc_pair_instruction * rgb, - struct rc_pair_instruction * alpha) +static int +destructive_merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha) { - const struct rc_opcode_info * opcode; + const struct rc_opcode_info *opcode; - assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); - assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); - /* Presubtract registers need to be merged first so that registers - * needed by the presubtract operation can be placed in src0 and/or - * src1. */ + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ - /* Merge the rgb presubtract registers. */ - if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { - return 0; - } - } - /* Merge the alpha presubtract registers */ - if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ - return 0; - } - } + /* Merge the rgb presubtract registers. */ + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { + return 0; + } + } + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)) { + return 0; + } + } - /* Copy alpha args into rgb */ - opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + /* Copy alpha args into rgb */ + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; - rc_register_file file = 0; - unsigned int index = 0; - int source; + for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + int source; - if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { - srcrgb = 1; - file = alpha->RGB.Src[oldsrc].File; - index = alpha->RGB.Src[oldsrc].Index; - } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { - srcalpha = 1; - file = alpha->Alpha.Src[oldsrc].File; - index = alpha->Alpha.Src[oldsrc].Index; - } + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } - source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); - if (source < 0) - return 0; + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; - rgb->Alpha.Arg[arg].Source = source; - rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; - rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; - rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; - } + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } - /* Copy alpha opcode into rgb */ - rgb->Alpha.Opcode = alpha->Alpha.Opcode; - rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; - rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; - rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; - rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; - rgb->Alpha.Saturate = alpha->Alpha.Saturate; - rgb->Alpha.Omod = alpha->Alpha.Omod; + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + rgb->Alpha.Omod = alpha->Alpha.Omod; - /* Merge ALU result writing */ - if (alpha->WriteALUResult) { - if (rgb->WriteALUResult) - return 0; + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; - rgb->WriteALUResult = alpha->WriteALUResult; - rgb->ALUResultCompare = alpha->ALUResultCompare; - } + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } - /* Copy SemWait */ - rgb->SemWait |= alpha->SemWait; + /* Copy SemWait */ + rgb->SemWait |= alpha->SemWait; - return 1; + return 1; } /** @@ -666,697 +670,673 @@ static int destructive_merge_instructions( * Return true on success; on failure, return false, and keep * the instructions untouched. */ -static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +static int +merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha) { - struct rc_pair_instruction backup; + struct rc_pair_instruction backup; - /*Instructions can't write output registers and ALU result at the - * same time. */ - if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) - || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { - return 0; - } + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) || + (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } - /* Writing output registers in the middle of shaders is slow, so - * we don't want to pair output writes with temp writes. */ - if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) - || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { - return 0; - } + /* Writing output registers in the middle of shaders is slow, so + * we don't want to pair output writes with temp writes. */ + if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) || + (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { + return 0; + } - memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); - if (destructive_merge_instructions(rgb, alpha)) - return 1; + if (destructive_merge_instructions(rgb, alpha)) + return 1; - memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); - return 0; + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; } -static void presub_nop(struct rc_instruction * emitted) { - int prev_rgb_index, prev_alpha_index, i, num_src; +static void +presub_nop(struct rc_instruction *emitted) +{ + int prev_rgb_index, prev_alpha_index, i, num_src; - /* We don't need a nop if the previous instruction is a TEX. */ - if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { - return; - } - if (emitted->Prev->U.P.RGB.WriteMask) - prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; - else - prev_rgb_index = -1; - if (emitted->Prev->U.P.Alpha.WriteMask) - prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; - else - prev_alpha_index = 1; + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; - /* Check the previous rgb instruction */ - if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - num_src = rc_presubtract_src_reg_count( - emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); - for (i = 0; i < num_src; i++) { - unsigned int index = emitted->U.P.RGB.Src[i].Index; - if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY - && (index == prev_rgb_index - || index == prev_alpha_index)) { - emitted->Prev->U.P.Nop = 1; - return; - } - } - } + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count(emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY && + (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } - /* Check the previous alpha instruction. */ - if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - return; + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; - num_src = rc_presubtract_src_reg_count( - emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); - for (i = 0; i < num_src; i++) { - unsigned int index = emitted->U.P.Alpha.Src[i].Index; - if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY - && (index == prev_rgb_index || index == prev_alpha_index)) { - emitted->Prev->U.P.Nop = 1; - return; - } - } + num_src = rc_presubtract_src_reg_count(emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if (emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY && + (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } } -static void rgb_to_alpha_remap ( - struct schedule_state * s, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - rc_register_file old_file, - rc_swizzle old_swz, - unsigned int new_index) +static void +rgb_to_alpha_remap(struct schedule_state *s, struct rc_instruction *inst, + struct rc_pair_instruction_arg *arg, rc_register_file old_file, + rc_swizzle old_swz, unsigned int new_index) { - int new_src_index; - unsigned int i; + int new_src_index; + unsigned int i; - for (i = 0; i < 3; i++) { - if (get_swz(arg->Swizzle, i) == old_swz) { - SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); - } - } - new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, - old_file, new_index); - /* This conversion is not possible, we must have made a mistake in - * is_rgb_to_alpha_possible. */ - if (new_src_index < 0) { - rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n"); - return; - } + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n"); + return; + } - arg->Source = new_src_index; + arg->Source = new_src_index; } -static int can_remap(unsigned int opcode) +static int +can_remap(unsigned int opcode) { - switch(opcode) { - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - return 0; - default: - return 1; - } + switch (opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } } -static int can_convert_opcode_to_alpha(unsigned int opcode) +static int +can_convert_opcode_to_alpha(unsigned int opcode) { - switch(opcode) { - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - case RC_OPCODE_DP2: - case RC_OPCODE_DP3: - case RC_OPCODE_DP4: - return 0; - default: - return 1; - } + switch (opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + return 0; + default: + return 1; + } } -static void is_rgb_to_alpha_possible( - void * userdata, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) +static void +is_rgb_to_alpha_possible(void *userdata, struct rc_instruction *inst, + struct rc_pair_instruction_arg *arg, + struct rc_pair_instruction_source *src) { - unsigned int read_chan = RC_SWIZZLE_UNUSED; - unsigned int alpha_sources = 0; - unsigned int i; - struct rc_reader_data * reader_data = userdata; + unsigned int read_chan = RC_SWIZZLE_UNUSED; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data *reader_data = userdata; - if (!can_remap(inst->U.P.RGB.Opcode) - || !can_remap(inst->U.P.Alpha.Opcode)) { - reader_data->Abort = 1; - return; - } + if (!can_remap(inst->U.P.RGB.Opcode) || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } - if (!src) - return; + if (!src) + return; - /* XXX There are some cases where we can still do the conversion if - * a reader reads from a presubtract source, but for now we'll prevent - * it. */ - if (arg->Source == RC_PAIR_PRESUB_SRC) { - reader_data->Abort = 1; - return; - } + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } - /* Make sure the source only reads the register component that we - * are going to be converting from. It is OK if the instruction uses - * this component more than once. - * XXX If the index we will be converting to is the same as the - * current index, then it is OK to read from more than one component. - */ - for (i = 0; i < 3; i++) { - rc_swizzle swz = get_swz(arg->Swizzle, i); - switch(swz) { - case RC_SWIZZLE_X: - case RC_SWIZZLE_Y: - case RC_SWIZZLE_Z: - case RC_SWIZZLE_W: - if (read_chan == RC_SWIZZLE_UNUSED) { - read_chan = swz; - } else if (read_chan != swz) { - reader_data->Abort = 1; - return; - } - break; - default: - break; - } - } + /* Make sure the source only reads the register component that we + * are going to be converting from. It is OK if the instruction uses + * this component more than once. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch (swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + if (read_chan == RC_SWIZZLE_UNUSED) { + read_chan = swz; + } else if (read_chan != swz) { + reader_data->Abort = 1; + return; + } + break; + default: + break; + } + } - /* Make sure there are enough alpha sources. - * XXX If we know what register all the readers are going - * to be remapped to, then in some situations we can still do - * the substitution, even if all 3 alpha sources are being used.*/ - for (i = 0; i < 3; i++) { - if (inst->U.P.Alpha.Src[i].Used) { - alpha_sources++; - } - } - if (alpha_sources > 2) { - reader_data->Abort = 1; - return; - } + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the substitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } } -static int convert_rgb_to_alpha( - struct schedule_state * s, - struct schedule_instruction * sched_inst) +static int +convert_rgb_to_alpha(struct schedule_state *s, struct schedule_instruction *sched_inst) { - struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; - unsigned int old_mask = pair_inst->RGB.WriteMask; - unsigned int old_swz = rc_mask_to_swizzle(old_mask); - const struct rc_opcode_info * info = - rc_get_opcode_info(pair_inst->RGB.Opcode); - int new_index = -1; - unsigned int i; + struct rc_pair_instruction *pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info *info = rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; - if (sched_inst->GlobalReaders.Abort) - return 0; + if (sched_inst->GlobalReaders.Abort) + return 0; - /* Even though we checked that we can convert to alpha previously, it is - * possible that another rgb source of the reader instructions was already - * converted to alpha and we thus have no longer free alpha sources. - */ - for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { - struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; - if (reader.Inst->U.P.Alpha.Src[2].Used) - return 0; - } + /* Even though we checked that we can convert to alpha previously, it is + * possible that another rgb source of the reader instructions was already + * converted to alpha and we thus have no longer free alpha sources. + */ + for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + if (reader.Inst->U.P.Alpha.Src[2].Used) + return 0; + } - if (!pair_inst->RGB.WriteMask) - return 0; + if (!pair_inst->RGB.WriteMask) + return 0; - if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) - || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { - return 0; - } + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) || + !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } - assert(sched_inst->NumWriteValues == 1); + assert(sched_inst->NumWriteValues == 1); - if (!sched_inst->WriteValues[0]) { - assert(0); - return 0; - } + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } - /* We start at the old index, because if we can reuse the same - * register and just change the swizzle then it is more likely we - * will be able to convert all the readers. */ - for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { - struct reg_value ** new_regvalp = get_reg_valuep( - s, RC_FILE_TEMPORARY, i, 3); - if (!*new_regvalp) { - struct reg_value ** old_regvalp = - get_reg_valuep(s, - RC_FILE_TEMPORARY, - pair_inst->RGB.DestIndex, - rc_mask_to_swizzle(old_mask)); - new_index = i; - *new_regvalp = *old_regvalp; - break; - } - } - if (new_index < 0) { - return 0; - } + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value **new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value **old_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + break; + } + } + if (new_index < 0) { + return 0; + } - /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA - * as the RGB opcode, then the Alpha instruction will already contain - * the correct opcode and instruction args, so we do not want to - * overwrite them. - */ - if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { - pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; - memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, - sizeof(pair_inst->Alpha.Arg)); - } - pair_inst->Alpha.DestIndex = new_index; - pair_inst->Alpha.WriteMask = RC_MASK_W; - pair_inst->Alpha.Target = pair_inst->RGB.Target; - pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; - pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; - pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; - pair_inst->Alpha.Omod = pair_inst->RGB.Omod; - /* Move the swizzles into the first chan */ - for (i = 0; i < info->NumSrcRegs; i++) { - unsigned int j; - for (j = 0; j < 3; j++) { - unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); - if (swz != RC_SWIZZLE_UNUSED) { - pair_inst->Alpha.Arg[i].Swizzle = - rc_init_swizzle(swz, 1); - break; - } - } - } - pair_inst->RGB.Opcode = RC_OPCODE_NOP; - pair_inst->RGB.DestIndex = 0; - pair_inst->RGB.WriteMask = 0; - pair_inst->RGB.Target = 0; - pair_inst->RGB.OutputWriteMask = 0; - pair_inst->RGB.DepthWriteMask = 0; - pair_inst->RGB.Saturate = 0; - memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA + * as the RGB opcode, then the Alpha instruction will already contain + * the correct opcode and instruction args, so we do not want to + * overwrite them. + */ + if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, sizeof(pair_inst->Alpha.Arg)); + } + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = RC_MASK_W; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + pair_inst->Alpha.Omod = pair_inst->RGB.Omod; + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); - for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { - struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; - rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg, - RC_FILE_TEMPORARY, old_swz, new_index); - } - return 1; + for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg, RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; } -static void try_convert_and_pair( - struct schedule_state *s, - struct schedule_instruction ** inst_list) +static void +try_convert_and_pair(struct schedule_state *s, struct schedule_instruction **inst_list) { - struct schedule_instruction * list_ptr = *inst_list; - while (list_ptr && *inst_list && (*inst_list)->NextReady) { - int paired = 0; - if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP - && list_ptr->Instruction->U.P.RGB.Opcode - != RC_OPCODE_REPL_ALPHA) { - goto next; - } - if (list_ptr->NumWriteValues == 1 - && convert_rgb_to_alpha(s, list_ptr)) { + struct schedule_instruction *list_ptr = *inst_list; + while (list_ptr && *inst_list && (*inst_list)->NextReady) { + int paired = 0; + if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP && + list_ptr->Instruction->U.P.RGB.Opcode != RC_OPCODE_REPL_ALPHA) { + goto next; + } + if (list_ptr->NumWriteValues == 1 && convert_rgb_to_alpha(s, list_ptr)) { - struct schedule_instruction * pair_ptr; - remove_inst_from_list(inst_list, list_ptr); - add_inst_to_list_score(&s->ReadyAlpha, list_ptr); + struct schedule_instruction *pair_ptr; + remove_inst_from_list(inst_list, list_ptr); + add_inst_to_list_score(&s->ReadyAlpha, list_ptr); - for (pair_ptr = s->ReadyRGB; pair_ptr; - pair_ptr = pair_ptr->NextReady) { - if (merge_instructions(&pair_ptr->Instruction->U.P, - &list_ptr->Instruction->U.P)) { - remove_inst_from_list(&s->ReadyAlpha, list_ptr); - remove_inst_from_list(&s->ReadyRGB, pair_ptr); - pair_ptr->PairedInst = list_ptr; + for (pair_ptr = s->ReadyRGB; pair_ptr; pair_ptr = pair_ptr->NextReady) { + if (merge_instructions(&pair_ptr->Instruction->U.P, &list_ptr->Instruction->U.P)) { + remove_inst_from_list(&s->ReadyAlpha, list_ptr); + remove_inst_from_list(&s->ReadyRGB, pair_ptr); + pair_ptr->PairedInst = list_ptr; - add_inst_to_list(&s->ReadyFullALU, pair_ptr); - list_ptr = *inst_list; - paired = 1; - break; - } - - } - } - if (!paired) { -next: - list_ptr = list_ptr->NextReady; - } - } + add_inst_to_list(&s->ReadyFullALU, pair_ptr); + list_ptr = *inst_list; + paired = 1; + break; + } + } + } + if (!paired) { + next: + list_ptr = list_ptr->NextReady; + } + } } /** * This function attempts to merge RGB and Alpha instructions together. */ -static void pair_instructions(struct schedule_state * s) +static void +pair_instructions(struct schedule_state *s) { - struct schedule_instruction *rgb_ptr; - struct schedule_instruction *alpha_ptr; + struct schedule_instruction *rgb_ptr; + struct schedule_instruction *alpha_ptr; - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - rgb_ptr = s->ReadyRGB; - while(rgb_ptr) { - struct schedule_instruction * rgb_next = rgb_ptr->NextReady; - alpha_ptr = s->ReadyAlpha; - while(alpha_ptr) { - struct schedule_instruction * alpha_next = alpha_ptr->NextReady; - if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { - /* Remove RGB and Alpha from their ready lists. - */ - remove_inst_from_list(&s->ReadyRGB, rgb_ptr); - remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); - rgb_ptr->PairedInst = alpha_ptr; - add_inst_to_list(&s->ReadyFullALU, rgb_ptr); - break; - } - alpha_ptr = alpha_next; - } - rgb_ptr = rgb_next; - } + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + rgb_ptr = s->ReadyRGB; + while (rgb_ptr) { + struct schedule_instruction *rgb_next = rgb_ptr->NextReady; + alpha_ptr = s->ReadyAlpha; + while (alpha_ptr) { + struct schedule_instruction *alpha_next = alpha_ptr->NextReady; + if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { + /* Remove RGB and Alpha from their ready lists. + */ + remove_inst_from_list(&s->ReadyRGB, rgb_ptr); + remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); + rgb_ptr->PairedInst = alpha_ptr; + add_inst_to_list(&s->ReadyFullALU, rgb_ptr); + break; + } + alpha_ptr = alpha_next; + } + rgb_ptr = rgb_next; + } - if (!s->Opt) { - return; - } + if (!s->Opt) { + return; + } - /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB - * slot can be converted into Alpha instructions. */ - try_convert_and_pair(s, &s->ReadyFullALU); + /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB + * slot can be converted into Alpha instructions. */ + try_convert_and_pair(s, &s->ReadyFullALU); - /* Try to convert some of the RGB instructions to Alpha and - * try to pair it with another RGB. */ - try_convert_and_pair(s, &s->ReadyRGB); + /* Try to convert some of the RGB instructions to Alpha and + * try to pair it with another RGB. */ + try_convert_and_pair(s, &s->ReadyRGB); } -static void update_max_score( - struct schedule_state * s, - struct schedule_instruction ** list, - int * max_score, - struct schedule_instruction ** max_inst_out, - struct schedule_instruction *** list_out) +static void +update_max_score(struct schedule_state *s, struct schedule_instruction **list, int *max_score, + struct schedule_instruction **max_inst_out, + struct schedule_instruction ***list_out) { - struct schedule_instruction * list_ptr; - for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { - int score; - s->CalcScore(list_ptr); - score = list_ptr->Score; - if (!*max_inst_out || score > *max_score) { - *max_score = score; - *max_inst_out = list_ptr; - *list_out = list; - } - } + struct schedule_instruction *list_ptr; + for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { + int score; + s->CalcScore(list_ptr); + score = list_ptr->Score; + if (!*max_inst_out || score > *max_score) { + *max_score = score; + *max_inst_out = list_ptr; + *list_out = list; + } + } } -static void emit_instruction( - struct schedule_state * s, - struct rc_instruction * before) +static void +emit_instruction(struct schedule_state *s, struct rc_instruction *before) { - int max_score = -1; - struct schedule_instruction * max_inst = NULL; - struct schedule_instruction ** max_list = NULL; - unsigned tex_count = 0; - struct schedule_instruction * tex_ptr; + int max_score = -1; + struct schedule_instruction *max_inst = NULL; + struct schedule_instruction **max_list = NULL; + unsigned tex_count = 0; + struct schedule_instruction *tex_ptr; - pair_instructions(s); + pair_instructions(s); #if VERBOSE - fprintf(stderr, "Full:\n"); - print_list(s->ReadyFullALU); - fprintf(stderr, "RGB:\n"); - print_list(s->ReadyRGB); - fprintf(stderr, "Alpha:\n"); - print_list(s->ReadyAlpha); - fprintf(stderr, "TEX:\n"); - print_list(s->ReadyTEX); + fprintf(stderr, "Full:\n"); + print_list(s->ReadyFullALU); + fprintf(stderr, "RGB:\n"); + print_list(s->ReadyRGB); + fprintf(stderr, "Alpha:\n"); + print_list(s->ReadyAlpha); + fprintf(stderr, "TEX:\n"); + print_list(s->ReadyTEX); #endif - for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { - if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { - emit_all_tex(s, before); - s->PrevBlockHasKil = 1; - return; - } - tex_count++; - } - update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); - update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); - update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); + for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { + if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { + emit_all_tex(s, before); + s->PrevBlockHasKil = 1; + return; + } + tex_count++; + } + update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); - if (tex_count >= s->max_tex_group || max_score == -1 - || (s->TEXCount > 0 && tex_count == s->TEXCount) - || (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) { - emit_all_tex(s, before); - } else { + if (tex_count >= s->max_tex_group || max_score == -1 || + (s->TEXCount > 0 && tex_count == s->TEXCount) || + (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) { + emit_all_tex(s, before); + } else { + remove_inst_from_list(max_list, max_inst); + rc_insert_instruction(before->Prev, max_inst->Instruction); + commit_alu_instruction(s, max_inst); - remove_inst_from_list(max_list, max_inst); - rc_insert_instruction(before->Prev, max_inst->Instruction); - commit_alu_instruction(s, max_inst); - - presub_nop(before->Prev); - } + presub_nop(before->Prev); + } } -static void add_tex_reader( - struct schedule_state * s, - struct schedule_instruction * writer, - struct schedule_instruction * reader) +static void +add_tex_reader(struct schedule_state *s, struct schedule_instruction *writer, + struct schedule_instruction *reader) { - if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { - /*Not a TEX instructions */ - return; - } - reader->TexReadCount++; - rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); + if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { + /*Not a TEX instructions */ + return; + } + reader->TexReadCount++; + rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); } -static void scan_read(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) +static void +scan_read(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index, + unsigned int chan) { - struct schedule_state * s = data; - struct reg_value ** v = get_reg_valuep(s, file, index, chan); - struct reg_value_reader * reader; + struct schedule_state *s = data; + struct reg_value **v = get_reg_valuep(s, file, index, chan); + struct reg_value_reader *reader; - if (!v) - return; + if (!v) + return; - if (*v && (*v)->Writer == s->Current) { - /* The instruction reads and writes to a register component. - * In this case, we only want to increment dependencies by one. - * Why? - * Because each instruction depends on the writers of its source - * registers _and_ the most recent writer of its destination - * register. In this case, the current instruction (s->Current) - * has a dependency that both writes to one of its source - * registers and was the most recent writer to its destination - * register. We have already marked this dependency in - * scan_write(), so we don't need to do it again. - */ + if (*v && (*v)->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + * Why? + * Because each instruction depends on the writers of its source + * registers _and_ the most recent writer of its destination + * register. In this case, the current instruction (s->Current) + * has a dependency that both writes to one of its source + * registers and was the most recent writer to its destination + * register. We have already marked this dependency in + * scan_write(), so we don't need to do it again. + */ - /* We need to make sure we are adding s->Current to the - * previous writer's list of TexReaders, if the previous writer - * was a TEX instruction. - */ - add_tex_reader(s, s->PrevWriter[chan], s->Current); + /* We need to make sure we are adding s->Current to the + * previous writer's list of TexReaders, if the previous writer + * was a TEX instruction. + */ + add_tex_reader(s, s->PrevWriter[chan], s->Current); - return; - } + return; + } - DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); - reader->Reader = s->Current; - if (!*v) { - /* In this situation, the instruction reads from a register - * that hasn't been written to or read from in the current - * block. */ - *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); - memset(*v, 0, sizeof(struct reg_value)); - (*v)->Readers = reader; - } else { - reader->Next = (*v)->Readers; - (*v)->Readers = reader; - /* Only update the current instruction's dependencies if the - * register it reads from has been written to in this block. */ - if ((*v)->Writer) { - add_tex_reader(s, (*v)->Writer, s->Current); - s->Current->NumDependencies++; - } - } - (*v)->NumReaders++; + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + add_tex_reader(s, (*v)->Writer, s->Current); + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; - if (s->Current->NumReadValues >= 12) { - rc_error(s->C, "%s: NumReadValues overflow\n", __func__); - } else { - s->Current->ReadValues[s->Current->NumReadValues++] = *v; - } + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __func__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = *v; + } } -static void scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) +static void +scan_write(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index, + unsigned int chan) { - struct schedule_state * s = data; - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - struct reg_value * newv; + struct schedule_state *s = data; + struct reg_value **pv = get_reg_valuep(s, file, index, chan); + struct reg_value *newv; - if (!pv) - return; + if (!pv) + return; - DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); - memset(newv, 0, sizeof(*newv)); + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); - newv->Writer = s->Current; + newv->Writer = s->Current; - if (*pv) { - (*pv)->Next = newv; - s->Current->NumDependencies++; - /* Keep track of the previous writer to s->Current's destination - * register */ - s->PrevWriter[chan] = (*pv)->Writer; - } + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + /* Keep track of the previous writer to s->Current's destination + * register */ + s->PrevWriter[chan] = (*pv)->Writer; + } - *pv = newv; + *pv = newv; - if (s->Current->NumWriteValues >= 4) { - rc_error(s->C, "%s: NumWriteValues overflow\n", __func__); - } else { - s->Current->WriteValues[s->Current->NumWriteValues++] = newv; - } + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __func__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } } -static void is_rgb_to_alpha_possible_normal( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) +static void +is_rgb_to_alpha_possible_normal(void *userdata, struct rc_instruction *inst, + struct rc_src_register *src) { - struct rc_reader_data * reader_data = userdata; - reader_data->Abort = 1; - + struct rc_reader_data *reader_data = userdata; + reader_data->Abort = 1; } -static void schedule_block(struct schedule_state * s, - struct rc_instruction * begin, struct rc_instruction * end) +static void +schedule_block(struct schedule_state *s, struct rc_instruction *begin, struct rc_instruction *end) { - unsigned int ip; + unsigned int ip; - /* Scan instructions for data dependencies */ - ip = 0; - for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); - memset(s->Current, 0, sizeof(struct schedule_instruction)); + /* Scan instructions for data dependencies */ + ip = 0; + for (struct rc_instruction *inst = begin; inst != end; inst = inst->Next) { + s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); + memset(s->Current, 0, sizeof(struct schedule_instruction)); - if (inst->Type == RC_INSTRUCTION_NORMAL) { - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - if (info->HasTexture) { - s->TEXCount++; - } - } + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + if (info->HasTexture) { + s->TEXCount++; + } + } - /* XXX: This causes SemWait to be set for all instructions in - * a block if the previous block contained a TEX instruction. - * We can do better here, but it will take a lot of work. */ - if (s->PrevBlockHasTex) { - s->Current->TexReadCount = 1; - } + /* XXX: This causes SemWait to be set for all instructions in + * a block if the previous block contained a TEX instruction. + * We can do better here, but it will take a lot of work. */ + if (s->PrevBlockHasTex) { + s->Current->TexReadCount = 1; + } - s->Current->Instruction = inst; - inst->IP = ip++; + s->Current->Instruction = inst; + inst->IP = ip++; - DBG("%i: Scanning\n", inst->IP); + DBG("%i: Scanning\n", inst->IP); - /* The order of things here is subtle and maybe slightly - * counter-intuitive, to account for the case where an - * instruction writes to the same register as it reads - * from. */ - rc_for_all_writes_chan(inst, &scan_write, s); - rc_for_all_reads_chan(inst, &scan_read, s); + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, s); + rc_for_all_reads_chan(inst, &scan_read, s); - DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); + DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); - if (!s->Current->NumDependencies) { - instruction_ready(s, s->Current); - } + if (!s->Current->NumDependencies) { + instruction_ready(s, s->Current); + } - /* Get global readers for possible RGB->Alpha conversion. */ - s->Current->GlobalReaders.ExitOnAbort = 1; - rc_get_readers(s->C, inst, &s->Current->GlobalReaders, - is_rgb_to_alpha_possible_normal, - is_rgb_to_alpha_possible, NULL); - } + /* Get global readers for possible RGB->Alpha conversion. */ + s->Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); + } - /* Temporarily unlink all instructions */ - begin->Prev->Next = end; - end->Prev = begin->Prev; + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; - /* Schedule instructions back */ - while(!s->C->Error && - (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { - emit_instruction(s, end); - } + /* Schedule instructions back */ + while (!s->C->Error && (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { + emit_instruction(s, end); + } } -static int is_controlflow(struct rc_instruction * inst) +static int +is_controlflow(struct rc_instruction *inst) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - return opcode->IsFlowControl; - } - return 0; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; } -void rc_pair_schedule(struct radeon_compiler *cc, void *user) +void +rc_pair_schedule(struct radeon_compiler *cc, void *user) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct schedule_state s; - struct rc_instruction * inst = c->Base.Program.Instructions.Next; - unsigned int * opt = user; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc; + struct schedule_state s; + struct rc_instruction *inst = c->Base.Program.Instructions.Next; + unsigned int *opt = user; - memset(&s, 0, sizeof(s)); - s.Opt = *opt; - s.C = &c->Base; - if (s.C->is_r500) { - s.CalcScore = calc_score_readers; - } else { - s.CalcScore = calc_score_r300; - } - s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); - while(inst != &c->Base.Program.Instructions) { - struct rc_instruction * first; + memset(&s, 0, sizeof(s)); + s.Opt = *opt; + s.C = &c->Base; + if (s.C->is_r500) { + s.CalcScore = calc_score_readers; + } else { + s.CalcScore = calc_score_r300; + } + s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); + while (inst != &c->Base.Program.Instructions) { + struct rc_instruction *first; - if (is_controlflow(inst)) { - /* The TexSemWait flag is already properly set for ALU - * instructions using the results of normal TEX lookup, - * however it was found empirically that TEXKIL also needs - * synchronization with the control flow. This might not be optimal, - * however the docs don't offer any guidance in this matter. - */ - if (s.PrevBlockHasKil) { - inst->U.I.TexSemWait = 1; - s.PrevBlockHasKil = 0; - } - inst = inst->Next; - continue; - } + if (is_controlflow(inst)) { + /* The TexSemWait flag is already properly set for ALU + * instructions using the results of normal TEX lookup, + * however it was found empirically that TEXKIL also needs + * synchronization with the control flow. This might not be optimal, + * however the docs don't offer any guidance in this matter. + */ + if (s.PrevBlockHasKil) { + inst->U.I.TexSemWait = 1; + s.PrevBlockHasKil = 0; + } + inst = inst->Next; + continue; + } - first = inst; + first = inst; - while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) - inst = inst->Next; + while (inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; - DBG("Schedule one block\n"); - memset(s.Temporary, 0, sizeof(s.Temporary)); - s.TEXCount = 0; - schedule_block(&s, first, inst); - if (s.PendingTEX) { - s.PrevBlockHasTex = 1; - } - } + DBG("Schedule one block\n"); + memset(s.Temporary, 0, sizeof(s.Temporary)); + s.TEXCount = 0; + schedule_block(&s, first, inst); + if (s.PendingTEX) { + s.PrevBlockHasTex = 1; + } + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c index 0ae4d97690f..8f92365cc89 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -10,41 +10,41 @@ #include "util/compiler.h" - /** * Finally rewrite ADD, MOV, MUL as the appropriate native instruction * and reverse the order of arguments for CMP. */ -static void final_rewrite(struct rc_sub_instruction *inst) +static void +final_rewrite(struct rc_sub_instruction *inst) { - struct rc_src_register tmp; + struct rc_src_register tmp; - switch(inst->Opcode) { - case RC_OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = RC_FILE_NONE; - inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; - inst->SrcReg[1].Negate = RC_MASK_NONE; - inst->Opcode = RC_OPCODE_MAD; - break; - case RC_OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case RC_OPCODE_MOV: - inst->SrcReg[1] = inst->SrcReg[0]; - inst->Opcode = RC_OPCODE_MAX; - break; - case RC_OPCODE_MUL: - inst->SrcReg[2].File = RC_FILE_NONE; - inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; - inst->Opcode = RC_OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } + switch (inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + inst->SrcReg[1] = inst->SrcReg[0]; + inst->Opcode = RC_OPCODE_MAX; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } } /** @@ -55,326 +55,309 @@ static void final_rewrite(struct rc_sub_instruction *inst) * The output modifier cannot be disabled for a saturated MOV (MOV with clamping enabled). * RC_OMOD_DISABLE is only available on R5xx and is only valid with MIN/MAX/CMP/CND. */ -static unsigned translate_omod(struct r300_fragment_program_compiler *c, - struct rc_sub_instruction *inst) +static unsigned +translate_omod(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { - if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode && - (inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN || - inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND)) - return RC_OMOD_DISABLE; - return inst->Omod; + if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode && + (inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN || + inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND)) + return RC_OMOD_DISABLE; + return inst->Omod; } /** * Classify an instruction according to which ALUs etc. it needs */ -static void classify_instruction(struct rc_sub_instruction * inst, - int * needrgb, int * needalpha, int * istranscendent) +static void +classify_instruction(struct rc_sub_instruction *inst, int *needrgb, int *needalpha, + int *istranscendent) { - *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; - *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; - *istranscendent = 0; + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; - if (inst->WriteALUResult == RC_ALURESULT_X) - *needrgb = 1; - else if (inst->WriteALUResult == RC_ALURESULT_W) - *needalpha = 1; + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; - switch(inst->Opcode) { - case RC_OPCODE_ADD: - case RC_OPCODE_CMP: - case RC_OPCODE_CND: - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - case RC_OPCODE_FRC: - case RC_OPCODE_MAD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MOV: - case RC_OPCODE_MUL: - break; - case RC_OPCODE_COS: - case RC_OPCODE_EX2: - case RC_OPCODE_LG2: - case RC_OPCODE_RCP: - case RC_OPCODE_RSQ: - case RC_OPCODE_SIN: - *istranscendent = 1; - *needalpha = 1; - break; - case RC_OPCODE_DP4: - *needalpha = 1; - FALLTHROUGH; - case RC_OPCODE_DP3: - *needrgb = 1; - break; - default: - break; - } + switch (inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_CND: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: *needalpha = 1; FALLTHROUGH; + case RC_OPCODE_DP3: *needrgb = 1; break; + default: break; + } } -static void src_uses(struct rc_src_register src, unsigned int * rgb, - unsigned int * alpha) +static void +src_uses(struct rc_src_register src, unsigned int *rgb, unsigned int *alpha) { - int j; - for(j = 0; j < 4; ++j) { - unsigned int swz = GET_SWZ(src.Swizzle, j); - if (swz < 3) - *rgb = 1; - else if (swz < 4) - *alpha = 1; - } + int j; + for (j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } } /** * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ -static void set_pair_instruction(struct r300_fragment_program_compiler *c, - struct rc_pair_instruction * pair, - struct rc_sub_instruction * inst) +static void +set_pair_instruction(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *pair, + struct rc_sub_instruction *inst) { - int needrgb, needalpha, istranscendent; - const struct rc_opcode_info * opcode; - int i; + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info *opcode; + int i; - memset(pair, 0, sizeof(struct rc_pair_instruction)); + memset(pair, 0, sizeof(struct rc_pair_instruction)); - classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); - if (needrgb) { - if (istranscendent) - pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (needalpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } - opcode = rc_get_opcode_info(inst->Opcode); + opcode = rc_get_opcode_info(inst->Opcode); - /* Presubtract handling: - * We need to make sure that the values used by the presubtract - * operation end up in src0 or src1. */ - if(inst->PreSub.Opcode != RC_PRESUB_NONE) { - /* rc_pair_alloc_source() will fill in data for - * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ - int j; - for(j = 0; j < 3; j++) { - int src_regs; - if(inst->SrcReg[j].File != RC_FILE_PRESUB) - continue; + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if (inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for (j = 0; j < 3; j++) { + int src_regs; + if (inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; - src_regs = rc_presubtract_src_reg_count( - inst->PreSub.Opcode); - for(i = 0; i < src_regs; i++) { - unsigned int rgb = 0; - unsigned int alpha = 0; - src_uses(inst->SrcReg[j], &rgb, &alpha); - if(rgb) { - pair->RGB.Src[i].File = - inst->PreSub.SrcReg[i].File; - pair->RGB.Src[i].Index = - inst->PreSub.SrcReg[i].Index; - pair->RGB.Src[i].Used = 1; - } - if(alpha) { - pair->Alpha.Src[i].File = - inst->PreSub.SrcReg[i].File; - pair->Alpha.Src[i].Index = - inst->PreSub.SrcReg[i].Index; - pair->Alpha.Src[i].Used = 1; - } - } - } - } - - for(i = 0; i < opcode->NumSrcRegs; ++i) { - int source; - if (needrgb && !istranscendent) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int srcmask = 0; - int j; - /* We don't care about the alpha channel here. We only - * want the part of the swizzle that writes to rgb, - * since we are creating an rgb instruction. */ - for(j = 0; j < 3; ++j) { - unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - - if (swz < RC_SWIZZLE_W) - srcrgb = 1; - else if (swz == RC_SWIZZLE_W) - srcalpha = 1; - - /* We check for ZERO here as well because otherwise the zero - * sign (which doesn't matter and we already ignore it previously - * when checking for valid swizzle) could mess up the final negate sign. - * Example problematic pattern where this would be produced is: - * CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000} - * ADD temp[0].xyz, const[0].xyz_, -const[1].z00_; - * - * after inline literals would become: - * ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_; - * - * and after pair translate: - * src0.xyz = const[0], src0.w = 4.000000 (0x48) - * MAD temp[0].xyz, src0.xyz, src0.111, src0.w00 - * - * Without the zero check there would be -src0.w00. - */ - if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) - srcmask |= 1 << j; - } - source = rc_pair_alloc_source(pair, srcrgb, srcalpha, - inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (source < 0) { - rc_error(&c->Base, "Failed to translate " - "rgb instruction.\n"); - return; - } - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = - rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); - } - if (needalpha) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int swz; - if (istranscendent) { - swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle); - } else { - swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3); - } - - if (swz < 3) - srcrgb = 1; - else if (swz < 4) - srcalpha = 1; - source = rc_pair_alloc_source(pair, srcrgb, srcalpha, - inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (source < 0) { - rc_error(&c->Base, "Failed to translate " - "alpha instruction.\n"); - return; - } - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - - if (istranscendent) { - pair->Alpha.Arg[i].Negate = - !!(inst->SrcReg[i].Negate & - inst->DstReg.WriteMask); - } else { - pair->Alpha.Arg[i].Negate = - !!(inst->SrcReg[i].Negate & RC_MASK_W); - } - } - } - - /* Destination handling */ - if (inst->DstReg.File == RC_FILE_OUTPUT) { - if (inst->DstReg.Index == c->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else { - for (i = 0; i < 4; i++) { - if (inst->DstReg.Index == c->OutputColor[i]) { - pair->RGB.Target = i; - pair->Alpha.Target = i; - pair->RGB.OutputWriteMask |= - inst->DstReg.WriteMask & RC_MASK_XYZ; - pair->Alpha.OutputWriteMask |= - GET_BIT(inst->DstReg.WriteMask, 3); - break; - } + src_regs = rc_presubtract_src_reg_count(inst->PreSub.Opcode); + for (i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if (rgb) { + pair->RGB.Src[i].File = inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; } - } - } else { - if (needrgb) { - pair->RGB.DestIndex = inst->DstReg.Index; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; - } + if (alpha) { + pair->Alpha.Src[i].File = inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } - if (needalpha) { - pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); - if (pair->Alpha.WriteMask) { - pair->Alpha.DestIndex = inst->DstReg.Index; - } - } - } + for (i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int srcmask = 0; + int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ + for (j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (needrgb) { - pair->RGB.Omod = translate_omod(c, inst); - } - if (needalpha) { - pair->Alpha.Omod = translate_omod(c, inst); - } + if (swz < RC_SWIZZLE_W) + srcrgb = 1; + else if (swz == RC_SWIZZLE_W) + srcalpha = 1; - if (inst->WriteALUResult) { - pair->WriteALUResult = inst->WriteALUResult; - pair->ALUResultCompare = inst->ALUResultCompare; - } + /* We check for ZERO here as well because otherwise the zero + * sign (which doesn't matter and we already ignore it previously + * when checking for valid swizzle) could mess up the final negate sign. + * Example problematic pattern where this would be produced is: + * CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000} + * ADD temp[0].xyz, const[0].xyz_, -const[1].z00_; + * + * after inline literals would become: + * ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_; + * + * and after pair translate: + * src0.xyz = const[0], src0.w = 4.000000 (0x48) + * MAD temp[0].xyz, src0.xyz, src0.111, src0.w00 + * + * Without the zero check there would be -src0.w00. + */ + if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + srcmask |= 1 << j; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, + inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "rgb instruction.\n"); + return; + } + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = + !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz; + if (istranscendent) { + swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle); + } else { + swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3); + } + + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, + inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "alpha instruction.\n"); + return; + } + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + + if (istranscendent) { + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & inst->DstReg.WriteMask); + } else { + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + + if (needalpha) { + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } + } + } + + if (needrgb) { + pair->RGB.Omod = translate_omod(c, inst); + } + if (needalpha) { + pair->Alpha.Omod = translate_omod(c, inst); + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } } - -static void check_opcode_support(struct r300_fragment_program_compiler *c, - struct rc_sub_instruction *inst) +static void +check_opcode_support(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg) { - if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { - rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); - return; - } - } + if (opcode->HasDstReg) { + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->SrcReg[i].RelAddr) { - rc_error(&c->Base, "Fragment program does not support relative addressing " - " of source operands.\n"); - return; - } - } + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } } - /** * Translate all ALU instructions into corresponding pair instructions, * performing no other changes. */ -void rc_pair_translate(struct radeon_compiler *cc, void *user) +void +rc_pair_translate(struct radeon_compiler *cc, void *user) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc; - for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; - inst != &c->Base.Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * opcode; - struct rc_sub_instruction copy; + for (struct rc_instruction *inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode; + struct rc_sub_instruction copy; - if (inst->Type != RC_INSTRUCTION_NORMAL) - continue; + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; - opcode = rc_get_opcode_info(inst->U.I.Opcode); + opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) - continue; + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; - copy = inst->U.I; + copy = inst->U.I; - check_opcode_support(c, ©); + check_opcode_support(c, ©); - final_rewrite(©); - inst->Type = RC_INSTRUCTION_PAIR; - set_pair_instruction(c, &inst->U.P, ©); - } + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c index 2508a3d517b..0359d8574bc 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program.c +++ b/src/gallium/drivers/r300/compiler/radeon_program.c @@ -10,7 +10,6 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" - /** * Transform the given clause in the following way: * 1. Replace it with an empty clause @@ -26,108 +25,108 @@ * \note The transform is called 'local' because it can only look at * one instruction at a time. */ -void rc_local_transform( - struct radeon_compiler * c, - void *user) +void +rc_local_transform(struct radeon_compiler *c, void *user) { - struct radeon_program_transformation *transformations = - (struct radeon_program_transformation*)user; - struct rc_instruction * inst = c->Program.Instructions.Next; + struct radeon_program_transformation *transformations = + (struct radeon_program_transformation *)user; + struct rc_instruction *inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * current = inst; - int i; + while (inst != &c->Program.Instructions) { + struct rc_instruction *current = inst; + int i; - inst = inst->Next; + inst = inst->Next; - for(i = 0; transformations[i].function; ++i) { - struct radeon_program_transformation* t = transformations + i; + for (i = 0; transformations[i].function; ++i) { + struct radeon_program_transformation *t = transformations + i; - if (t->function(c, current, t->userData)) - break; - } - } + if (t->function(c, current, t->userData)) + break; + } + } } -unsigned int rc_find_free_temporary(struct radeon_compiler * c) +unsigned int +rc_find_free_temporary(struct radeon_compiler *c) { - /* Find the largest used temp index when called for the first time. */ - if (c->max_temp_index == -1) { - for (struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = - rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->HasDstReg && - inst->U.I.DstReg.File == RC_FILE_TEMPORARY && - inst->U.I.WriteALUResult == RC_ALURESULT_NONE && - inst->U.I.DstReg.Index > c->max_temp_index) - c->max_temp_index = inst->U.I.DstReg.Index; - } - } + /* Find the largest used temp index when called for the first time. */ + if (c->max_temp_index == -1) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + if (opcode->HasDstReg && inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.WriteALUResult == RC_ALURESULT_NONE && + inst->U.I.DstReg.Index > c->max_temp_index) + c->max_temp_index = inst->U.I.DstReg.Index; + } + } - c->max_temp_index++; - if (c->max_temp_index > RC_REGISTER_MAX_INDEX) { - rc_error(c, "Ran out of temporary registers\n"); - return 0; - } - return c->max_temp_index; + c->max_temp_index++; + if (c->max_temp_index > RC_REGISTER_MAX_INDEX) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return c->max_temp_index; } - -struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +struct rc_instruction * +rc_alloc_instruction(struct radeon_compiler *c) { - struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + struct rc_instruction *inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); - memset(inst, 0, sizeof(struct rc_instruction)); + memset(inst, 0, sizeof(struct rc_instruction)); - inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; - return inst; + return inst; } -void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +void +rc_insert_instruction(struct rc_instruction *after, struct rc_instruction *inst) { - inst->Prev = after; - inst->Next = after->Next; + inst->Prev = after; + inst->Next = after->Next; - inst->Prev->Next = inst; - inst->Next->Prev = inst; + inst->Prev->Next = inst; + inst->Next->Prev = inst; } -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +struct rc_instruction * +rc_insert_new_instruction(struct radeon_compiler *c, struct rc_instruction *after) { - struct rc_instruction * inst = rc_alloc_instruction(c); + struct rc_instruction *inst = rc_alloc_instruction(c); - rc_insert_instruction(after, inst); + rc_insert_instruction(after, inst); - return inst; + return inst; } -void rc_remove_instruction(struct rc_instruction * inst) +void +rc_remove_instruction(struct rc_instruction *inst) { - inst->Prev->Next = inst->Next; - inst->Next->Prev = inst->Prev; + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; } /** * Return the number of instructions in the program. */ -unsigned int rc_recompute_ips(struct radeon_compiler * c) +unsigned int +rc_recompute_ips(struct radeon_compiler *c) { - unsigned int ip = 0; - struct rc_instruction * inst; + unsigned int ip = 0; + struct rc_instruction *inst; - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - inst->IP = ip++; - } + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + inst->IP = ip++; + } - c->Program.Instructions.IP = 0xcafedead; + c->Program.Instructions.IP = 0xcafedead; - return ip; + return ip; } diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h index 7c2d6e817fd..d43eea886bb 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program.h +++ b/src/gallium/drivers/r300/compiler/radeon_program.h @@ -9,39 +9,39 @@ #include #include -#include "radeon_opcodes.h" #include "radeon_code.h" +#include "radeon_opcodes.h" #include "radeon_program_constants.h" #include "radeon_program_pair.h" struct radeon_compiler; struct rc_src_register { - unsigned int File:4; + unsigned int File : 4; - /** Negative values may be used for relative addressing. */ - unsigned int Index:RC_REGISTER_INDEX_BITS; - unsigned int RelAddr:1; + /** Negative values may be used for relative addressing. */ + unsigned int Index : RC_REGISTER_INDEX_BITS; + unsigned int RelAddr : 1; - unsigned int Swizzle:12; + unsigned int Swizzle : 12; - /** Take the component-wise absolute value */ - unsigned int Abs:1; + /** Take the component-wise absolute value */ + unsigned int Abs : 1; - /** Post-Abs negation. */ - unsigned int Negate:4; + /** Post-Abs negation. */ + unsigned int Negate : 4; }; struct rc_dst_register { - unsigned int File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; - unsigned int WriteMask:4; - unsigned int Pred:2; + unsigned int File : 3; + unsigned int Index : RC_REGISTER_INDEX_BITS; + unsigned int WriteMask : 4; + unsigned int Pred : 2; }; struct rc_presub_instruction { - rc_presubtract_op Opcode; - struct rc_src_register SrcReg[2]; + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; }; /** @@ -53,94 +53,91 @@ struct rc_presub_instruction { * instruction types may be valid. */ struct rc_sub_instruction { - struct rc_src_register SrcReg[3]; - struct rc_dst_register DstReg; + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; - /** - * Opcode of this instruction, according to \ref rc_opcode enums. - */ - unsigned int Opcode:8; + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode : 8; - /** - * Saturate each value of the result to the range [0,1] or [-1,1], - * according to \ref rc_saturate_mode enums. - */ - unsigned int SaturateMode:2; + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode : 2; - /** - * Writing to the special register RC_SPECIAL_ALU_RESULT - */ - /*@{*/ - unsigned int WriteALUResult:2; - unsigned int ALUResultCompare:3; - /*@}*/ + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult : 2; + unsigned int ALUResultCompare : 3; + /*@}*/ - /** - * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. - */ - /*@{*/ - /** Source texture unit. */ - unsigned int TexSrcUnit:5; + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit : 5; - /** Source texture target, one of the \ref rc_texture_target enums */ - unsigned int TexSrcTarget:3; + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget : 3; - /** True if tex instruction should do shadow comparison */ - unsigned int TexShadow:1; + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow : 1; - /**/ - unsigned int TexSemWait:1; - unsigned int TexSemAcquire:1; + /**/ + unsigned int TexSemWait : 1; + unsigned int TexSemAcquire : 1; - /**R500 Only. How to swizzle the result of a TEX lookup*/ - unsigned int TexSwizzle:12; - /*@}*/ + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle : 12; + /*@}*/ - /** This holds information about the presubtract operation used by - * this instruction. */ - struct rc_presub_instruction PreSub; + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; - rc_omod_op Omod; + rc_omod_op Omod; }; -typedef enum { - RC_INSTRUCTION_NORMAL = 0, - RC_INSTRUCTION_PAIR -} rc_instruction_type; +typedef enum { RC_INSTRUCTION_NORMAL = 0, RC_INSTRUCTION_PAIR } rc_instruction_type; struct rc_instruction { - struct rc_instruction * Prev; - struct rc_instruction * Next; + struct rc_instruction *Prev; + struct rc_instruction *Next; - rc_instruction_type Type; - union { - struct rc_sub_instruction I; - struct rc_pair_instruction P; - } U; + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; - /** - * Warning: IPs are not stable. If you want to use them, - * you need to recompute them at the beginning of each pass - * using \ref rc_recompute_ips - */ - unsigned int IP; + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; }; struct rc_program { - /** - * Instructions.Next points to the first instruction, - * Instructions.Prev points to the last instruction. - */ - struct rc_instruction Instructions; + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; - /* Long term, we should probably remove InputsRead & OutputsWritten, - * since updating dependent state can be fragile, and they aren't - * actually used very often. */ - uint32_t InputsRead; - uint32_t OutputsWritten; - uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ - struct rc_constant_list Constants; + struct rc_constant_list Constants; }; /** @@ -154,36 +151,27 @@ struct rc_program { * The function gets passed the userData as last parameter. */ struct radeon_program_transformation { - int (*function)( - struct radeon_compiler*, - struct rc_instruction*, - void*); - void *userData; + int (*function)(struct radeon_compiler *, struct rc_instruction *, void *); + void *userData; }; -void rc_local_transform( - struct radeon_compiler *c, - void *user); +void rc_local_transform(struct radeon_compiler *c, void *user); -void rc_get_used_temporaries( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length); +void rc_get_used_temporaries(struct radeon_compiler *c, unsigned char *used, + unsigned int used_length); -int rc_find_free_temporary_list( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length, - unsigned int mask); +int rc_find_free_temporary_list(struct radeon_compiler *c, unsigned char *used, + unsigned int used_length, unsigned int mask); -unsigned int rc_find_free_temporary(struct radeon_compiler * c); +unsigned int rc_find_free_temporary(struct radeon_compiler *c); -struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); -void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); -void rc_remove_instruction(struct rc_instruction * inst); +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler *c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler *c, + struct rc_instruction *after); +void rc_insert_instruction(struct rc_instruction *after, struct rc_instruction *inst); +void rc_remove_instruction(struct rc_instruction *inst); -unsigned int rc_recompute_ips(struct radeon_compiler * c); +unsigned int rc_recompute_ips(struct radeon_compiler *c); void rc_print_program(const struct rc_program *prog); diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index c6fcdbff4cf..9cf605a0b6f 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -19,183 +19,183 @@ #include "util/log.h" -static struct rc_instruction *emit1( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, struct rc_sub_instruction * base, - struct rc_dst_register DstReg, struct rc_src_register SrcReg) +static struct rc_instruction * +emit1(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode, + struct rc_sub_instruction *base, struct rc_dst_register DstReg, struct rc_src_register SrcReg) { - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - if (base) { - memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); - } + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } - fpi->U.I.Opcode = Opcode; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg; - return fpi; + fpi->U.I.Opcode = Opcode; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; } -static struct rc_instruction *emit2( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, struct rc_sub_instruction * base, - struct rc_dst_register DstReg, - struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +static struct rc_instruction * +emit2(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode, + struct rc_sub_instruction *base, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) { - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - if (base) { - memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); - } + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } - fpi->U.I.Opcode = Opcode; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg0; - fpi->U.I.SrcReg[1] = SrcReg1; - return fpi; + fpi->U.I.Opcode = Opcode; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; } -static struct rc_instruction *emit3( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, struct rc_sub_instruction * base, - struct rc_dst_register DstReg, - struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, - struct rc_src_register SrcReg2) +static struct rc_instruction * +emit3(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode, + struct rc_sub_instruction *base, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) { - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - if (base) { - memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); - } + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } - fpi->U.I.Opcode = Opcode; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg0; - fpi->U.I.SrcReg[1] = SrcReg1; - fpi->U.I.SrcReg[2] = SrcReg2; - return fpi; + fpi->U.I.Opcode = Opcode; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; } -static struct rc_dst_register dstregtmpmask(int index, int mask) +static struct rc_dst_register +dstregtmpmask(int index, int mask) { - struct rc_dst_register dst = {0, 0, 0}; - dst.File = RC_FILE_TEMPORARY; - dst.Index = index; - dst.WriteMask = mask; - return dst; + struct rc_dst_register dst = {0, 0, 0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + return dst; } static const struct rc_src_register builtin_one = { - .File = RC_FILE_NONE, - .Index = 0, - .Swizzle = RC_SWIZZLE_1111 -}; + .File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_1111}; static const struct rc_src_register srcreg_undefined = { - .File = RC_FILE_NONE, - .Index = 0, - .Swizzle = RC_SWIZZLE_XYZW -}; + .File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_XYZW}; -static struct rc_src_register srcreg(int file, int index) +static struct rc_src_register +srcreg(int file, int index) { - struct rc_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - return src; + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; } -static struct rc_src_register srcregswz(int file, int index, int swz) +static struct rc_src_register +srcregswz(int file, int index, int swz) { - struct rc_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - src.Swizzle = swz; - return src; + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; } -static struct rc_src_register absolute(struct rc_src_register reg) +static struct rc_src_register +absolute(struct rc_src_register reg) { - struct rc_src_register newreg = reg; - newreg.Abs = 1; - newreg.Negate = RC_MASK_NONE; - return newreg; + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; } -static struct rc_src_register negate(struct rc_src_register reg) +static struct rc_src_register +negate(struct rc_src_register reg) { - struct rc_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; - return newreg; + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; } -static struct rc_src_register swizzle(struct rc_src_register reg, - rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +static struct rc_src_register +swizzle(struct rc_src_register reg, rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) { - struct rc_src_register swizzled = reg; - swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); - return swizzled; + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; } -static struct rc_src_register swizzle_smear(struct rc_src_register reg, - rc_swizzle x) +static struct rc_src_register +swizzle_smear(struct rc_src_register reg, rc_swizzle x) { - return swizzle(reg, x, x, x, x); + return swizzle(reg, x, x, x, x); } -static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +static struct rc_src_register +swizzle_xxxx(struct rc_src_register reg) { - return swizzle_smear(reg, RC_SWIZZLE_X); + return swizzle_smear(reg, RC_SWIZZLE_X); } -static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +static struct rc_src_register +swizzle_yyyy(struct rc_src_register reg) { - return swizzle_smear(reg, RC_SWIZZLE_Y); + return swizzle_smear(reg, RC_SWIZZLE_Y); } -static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +static struct rc_src_register +swizzle_zzzz(struct rc_src_register reg) { - return swizzle_smear(reg, RC_SWIZZLE_Z); + return swizzle_smear(reg, RC_SWIZZLE_Z); } -static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +static struct rc_src_register +swizzle_wwww(struct rc_src_register reg) { - return swizzle_smear(reg, RC_SWIZZLE_W); + return swizzle_smear(reg, RC_SWIZZLE_W); } -static struct rc_dst_register new_dst_reg(struct radeon_compiler *c, - struct rc_instruction *inst) +static struct rc_dst_register +new_dst_reg(struct radeon_compiler *c, struct rc_instruction *inst) { - unsigned tmp = rc_find_free_temporary(c); - return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); + unsigned tmp = rc_find_free_temporary(c); + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); } -static void transform_DP2(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_DP2(struct radeon_compiler *c, struct rc_instruction *inst) { - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); - src0.Swizzle &= ~(63 << (3 * 2)); - src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); - src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); - src1.Swizzle &= ~(63 << (3 * 2)); - src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); - emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1); - rc_remove_instruction(inst); + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); } -static void transform_RSQ(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_RSQ(struct radeon_compiler *c, struct rc_instruction *inst) { - inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); } -static void transform_KILP(struct radeon_compiler * c, - struct rc_instruction * inst) +static void +transform_KILP(struct radeon_compiler *c, struct rc_instruction *inst) { - inst->U.I.SrcReg[0] = negate(builtin_one); - inst->U.I.Opcode = RC_OPCODE_KIL; + inst->U.I.SrcReg[0] = negate(builtin_one); + inst->U.I.Opcode = RC_OPCODE_KIL; } /** @@ -207,165 +207,150 @@ static void transform_KILP(struct radeon_compiler * c, * * @note should be applicable to R300 and R500 fragment programs. */ -int radeonTransformALU( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* unused) +int +radeonTransformALU(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { - switch(inst->U.I.Opcode) { - case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; - case RC_OPCODE_KILP: transform_KILP(c, inst); return 1; - case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; - case RC_OPCODE_SEQ: unreachable(); - case RC_OPCODE_SGE: unreachable(); - case RC_OPCODE_SLT: unreachable(); - case RC_OPCODE_SNE: unreachable(); - default: - return 0; - } + switch (inst->U.I.Opcode) { + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; + case RC_OPCODE_KILP: transform_KILP(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: unreachable(); + case RC_OPCODE_SGE: unreachable(); + case RC_OPCODE_SLT: unreachable(); + case RC_OPCODE_SNE: unreachable(); + default: return 0; + } } -static void transform_r300_vertex_CMP(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_r300_vertex_CMP(struct radeon_compiler *c, struct rc_instruction *inst) { - /* R5xx has a CMP, but we can use it only if it reads from less than - * three different temps. */ - if (c->is_r500 && !rc_inst_has_three_diff_temp_srcs(inst)) - return; + /* R5xx has a CMP, but we can use it only if it reads from less than + * three different temps. */ + if (c->is_r500 && !rc_inst_has_three_diff_temp_srcs(inst)) + return; - unreachable(); + unreachable(); } -static void transform_r300_vertex_DP2(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_r300_vertex_DP2(struct radeon_compiler *c, struct rc_instruction *inst) { - struct rc_instruction *next_inst = inst->Next; - transform_DP2(c, inst); - next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; } -static void transform_r300_vertex_DP3(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_r300_vertex_DP3(struct radeon_compiler *c, struct rc_instruction *inst) { - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~RC_MASK_W; - src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1); - rc_remove_instruction(inst); + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); } -static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, - struct rc_instruction* inst) +static void +transform_r300_vertex_fix_LIT(struct radeon_compiler *c, struct rc_instruction *inst) { - struct rc_dst_register dst = new_dst_reg(c, inst); - unsigned constant_swizzle; - int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, - 0.0000000000000000001, - &constant_swizzle); + struct rc_dst_register dst = new_dst_reg(c, inst); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, + &constant_swizzle); - /* MOV dst, src */ - dst.WriteMask = RC_MASK_XYZW; - emit1(c, inst->Prev, RC_OPCODE_MOV, NULL, - dst, - inst->U.I.SrcReg[0]); + /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; + emit1(c, inst->Prev, RC_OPCODE_MOV, NULL, dst, inst->U.I.SrcReg[0]); - /* MAX dst.y, src, 0.00...001 */ - emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, - dstregtmpmask(dst.Index, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, dst.Index), - srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + /* MAX dst.y, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } -static void transform_r300_vertex_SEQ(struct radeon_compiler *c, - struct rc_instruction *inst) +static void +transform_r300_vertex_SEQ(struct radeon_compiler *c, struct rc_instruction *inst) { - /* x = y <==> x >= y && y >= x */ - /* x <= y */ - struct rc_dst_register dst0 = new_dst_reg(c, inst); - emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, - dst0, - inst->U.I.SrcReg[0], - inst->U.I.SrcReg[1]); + /* x = y <==> x >= y && y >= x */ + /* x <= y */ + struct rc_dst_register dst0 = new_dst_reg(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]); - /* y <= x */ - int tmp = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, - dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[1], - inst->U.I.SrcReg[0]); + /* y <= x */ + int tmp = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]); - /* x && y = x * y */ - emit2(c, inst->Prev, RC_OPCODE_MUL, NULL, - inst->U.I.DstReg, - srcreg(dst0.File, dst0.Index), - srcreg(RC_FILE_TEMPORARY, tmp)); + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index), + srcreg(RC_FILE_TEMPORARY, tmp)); - rc_remove_instruction(inst); + rc_remove_instruction(inst); } -static void transform_r300_vertex_SNE(struct radeon_compiler *c, - struct rc_instruction *inst) +static void +transform_r300_vertex_SNE(struct radeon_compiler *c, struct rc_instruction *inst) { - /* x != y <==> x < y || y < x */ - /* x < y */ - struct rc_dst_register dst0 = new_dst_reg(c, inst); - emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, - dst0, - inst->U.I.SrcReg[0], - inst->U.I.SrcReg[1]); + /* x != y <==> x < y || y < x */ + /* x < y */ + struct rc_dst_register dst0 = new_dst_reg(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]); - /* y < x */ - int tmp = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, - dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[1], - inst->U.I.SrcReg[0]); + /* y < x */ + int tmp = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]); - /* x || y = max(x, y) */ - emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, - inst->U.I.DstReg, - srcreg(dst0.File, dst0.Index), - srcreg(RC_FILE_TEMPORARY, tmp)); + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index), + srcreg(RC_FILE_TEMPORARY, tmp)); - rc_remove_instruction(inst); + rc_remove_instruction(inst); } /** * For use with rc_local_transform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. */ -int r300_transform_vertex_alu( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* unused) +int +r300_transform_vertex_alu(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { - switch(inst->U.I.Opcode) { - case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; - case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; - case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; - case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; - case RC_OPCODE_SEQ: - if (!c->is_r500) { - transform_r300_vertex_SEQ(c, inst); - return 1; - } - return 0; - case RC_OPCODE_SNE: - if (!c->is_r500) { - transform_r300_vertex_SNE(c, inst); - return 1; - } - return 0; - default: - return 0; - } + switch (inst->U.I.Opcode) { + case RC_OPCODE_CMP: + transform_r300_vertex_CMP(c, inst); + return 1; + case RC_OPCODE_DP2: + transform_r300_vertex_DP2(c, inst); + return 1; + case RC_OPCODE_DP3: + transform_r300_vertex_DP3(c, inst); + return 1; + case RC_OPCODE_LIT: + transform_r300_vertex_fix_LIT(c, inst); + return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + default: + return 0; + } } /** @@ -374,21 +359,20 @@ int r300_transform_vertex_alu( * @warning This explicitly changes the form of DDX and DDY! */ -int radeonStubDeriv(struct radeon_compiler* c, - struct rc_instruction* inst, - void* unused) +int +radeonStubDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { - if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) - return 0; + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; - mesa_logw_once("r300: WARNING: Shader is trying to use derivatives, " - "but the hardware doesn't support it. " - "Expect possible misrendering (it's not a bug, do not report it)."); + mesa_logw_once("r300: WARNING: Shader is trying to use derivatives, " + "but the hardware doesn't support it. " + "Expect possible misrendering (it's not a bug, do not report it)."); - return 1; + return 1; } /** @@ -399,43 +383,42 @@ int radeonStubDeriv(struct radeon_compiler* c, * @warning This explicitly changes the form of DDX and DDY! */ -int radeonTransformDeriv(struct radeon_compiler* c, - struct rc_instruction* inst, - void* unused) +int +radeonTransformDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { - if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) - return 0; + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; - inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; - inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - return 1; + return 1; } -int rc_force_output_alpha_to_one(struct radeon_compiler *c, - struct rc_instruction *inst, void *data) +int +rc_force_output_alpha_to_one(struct radeon_compiler *c, struct rc_instruction *inst, void *data) { - struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c; - const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); - unsigned tmp; + struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler *)c; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned tmp; - if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT || - inst->U.I.DstReg.Index == fragc->OutputDepth) - return 1; + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT || + inst->U.I.DstReg.Index == fragc->OutputDepth) + return 1; - tmp = rc_find_free_temporary(c); + tmp = rc_find_free_temporary(c); - /* Insert MOV after inst, set alpha to 1. */ - emit1(c, inst, RC_OPCODE_MOV, NULL, inst->U.I.DstReg, - srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1)); + /* Insert MOV after inst, set alpha to 1. */ + emit1(c, inst, RC_OPCODE_MOV, NULL, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1)); - /* Re-route the destination of inst to the source of mov. */ - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = tmp; + /* Re-route the destination of inst to the source of mov. */ + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp; - /* Move the saturate output modifier to the MOV instruction - * (for better copy propagation). */ - inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode; - inst->U.I.SaturateMode = RC_SATURATE_NONE; - return 1; + /* Move the saturate output modifier to the MOV instruction + * (for better copy propagation). */ + inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode; + inst->U.I.SaturateMode = RC_SATURATE_NONE; + return 1; } diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h index 6d111600db8..c01e4742c29 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h @@ -8,27 +8,15 @@ #include "radeon_program.h" -int radeonTransformALU( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); +int radeonTransformALU(struct radeon_compiler *c, struct rc_instruction *inst, void *); -int r300_transform_vertex_alu( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); +int r300_transform_vertex_alu(struct radeon_compiler *c, struct rc_instruction *inst, void *); -int radeonStubDeriv( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); +int radeonStubDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *); -int radeonTransformDeriv( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); +int radeonTransformDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *); -int rc_force_output_alpha_to_one(struct radeon_compiler *c, - struct rc_instruction *inst, void *data); +int rc_force_output_alpha_to_one(struct radeon_compiler *c, struct rc_instruction *inst, + void *data); #endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h index b1fb056c736..9c6f4f16824 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_constants.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -7,105 +7,106 @@ #define RADEON_PROGRAM_CONSTANTS_H typedef enum { - RC_SATURATE_NONE = 0, - RC_SATURATE_ZERO_ONE, - RC_SATURATE_MINUS_PLUS_ONE + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE } rc_saturate_mode; typedef enum { - RC_TEXTURE_2D_ARRAY, - RC_TEXTURE_1D_ARRAY, - RC_TEXTURE_CUBE, - RC_TEXTURE_3D, - RC_TEXTURE_RECT, - RC_TEXTURE_2D, - RC_TEXTURE_1D + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D } rc_texture_target; typedef enum { - /** - * Used to indicate unused register descriptions and - * source register that use a constant swizzle. - */ - RC_FILE_NONE = 0, - RC_FILE_TEMPORARY, + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, - /** - * Input register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_INPUT, + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, - /** - * Output register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_OUTPUT, - RC_FILE_ADDRESS, + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, - /** - * Indicates a constant from the \ref rc_constant_list . - */ - RC_FILE_CONSTANT, + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, - /** - * Indicates a special register, see RC_SPECIAL_xxx. - */ - RC_FILE_SPECIAL, + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL, - /** - * Indicates this register should use the result of the presubtract - * operation. - */ - RC_FILE_PRESUB, + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB, - /** - * Indicates that the source index has been encoded as a 7-bit float. - */ - RC_FILE_INLINE + /** + * Indicates that the source index has been encoded as a 7-bit float. + */ + RC_FILE_INLINE } rc_register_file; enum { - /** R500 fragment program ALU result "register" */ - RC_SPECIAL_ALU_RESULT = 0, + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, - /** Must be last */ - RC_NUM_SPECIAL_REGISTERS + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS }; #define RC_REGISTER_INDEX_BITS 11 -#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) typedef enum { - RC_SWIZZLE_X = 0, - RC_SWIZZLE_Y, - RC_SWIZZLE_Z, - RC_SWIZZLE_W, - RC_SWIZZLE_ZERO, - RC_SWIZZLE_ONE, - RC_SWIZZLE_HALF, - RC_SWIZZLE_UNUSED + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED } rc_swizzle; -static inline int is_swizzle_inline_constant(rc_swizzle swizzle){ - return swizzle >= RC_SWIZZLE_ZERO; - +static inline int +is_swizzle_inline_constant(rc_swizzle swizzle) +{ + return swizzle >= RC_SWIZZLE_ZERO; } -#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) -#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) -#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) -#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) -#define SET_SWZ(swz, idx, newv) \ - do { \ - (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ - } while(0) +#define RC_MAKE_SWIZZLE(a, b, c, d) (((a) << 0) | ((b) << 3) | ((c) << 6) | ((d) << 9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a), (a), (a), (a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx) * 3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx) * 3))) | ((newv) << ((idx) * 3)); \ + } while (0) #define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) #define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) @@ -127,70 +128,64 @@ static inline int is_swizzle_inline_constant(rc_swizzle swizzle){ */ /*@{*/ #define RC_MASK_NONE 0 -#define RC_MASK_X 1 -#define RC_MASK_Y 2 -#define RC_MASK_Z 4 -#define RC_MASK_W 8 -#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) -#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) -#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) -#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X | RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X | RC_MASK_Y | RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X | RC_MASK_Y | RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W) /*@}*/ -typedef enum { - RC_ALURESULT_NONE = 0, - RC_ALURESULT_X, - RC_ALURESULT_W -} rc_write_aluresult; +typedef enum { RC_ALURESULT_NONE = 0, RC_ALURESULT_X, RC_ALURESULT_W } rc_write_aluresult; typedef enum { - RC_PRESUB_NONE = 0, + RC_PRESUB_NONE = 0, - /** 1 - 2 * src0 */ - RC_PRESUB_BIAS, + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, - /** src1 - src0 */ - RC_PRESUB_SUB, + /** src1 - src0 */ + RC_PRESUB_SUB, - /** src1 + src0 */ - RC_PRESUB_ADD, + /** src1 + src0 */ + RC_PRESUB_ADD, - /** 1 - src0 */ - RC_PRESUB_INV + /** 1 - src0 */ + RC_PRESUB_INV } rc_presubtract_op; typedef enum { - RC_OMOD_MUL_1, - RC_OMOD_MUL_2, - RC_OMOD_MUL_4, - RC_OMOD_MUL_8, - RC_OMOD_DIV_2, - RC_OMOD_DIV_4, - RC_OMOD_DIV_8, - RC_OMOD_DISABLE + RC_OMOD_MUL_1, + RC_OMOD_MUL_2, + RC_OMOD_MUL_4, + RC_OMOD_MUL_8, + RC_OMOD_DIV_2, + RC_OMOD_DIV_4, + RC_OMOD_DIV_8, + RC_OMOD_DISABLE } rc_omod_op; -static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ - switch(op){ - case RC_PRESUB_BIAS: - case RC_PRESUB_INV: - return 1; - case RC_PRESUB_ADD: - case RC_PRESUB_SUB: - return 2; - default: - return 0; - } +static inline int +rc_presubtract_src_reg_count(rc_presubtract_op op) +{ + switch (op) { + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } } #define RC_SOURCE_NONE 0x0 #define RC_SOURCE_RGB 0x1 #define RC_SOURCE_ALPHA 0x2 -typedef enum { - RC_PRED_DISABLED, - RC_PRED_SET, - RC_PRED_INV -} rc_predicate_mode; +typedef enum { RC_PRED_DISABLED, RC_PRED_SET, RC_PRED_INV } rc_predicate_mode; #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c index cd063972a2e..bf08695d3ee 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_pair.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c @@ -13,205 +13,188 @@ * Return the source slot where we installed the given register access, * or -1 if no slot was free anymore. */ -int rc_pair_alloc_source(struct rc_pair_instruction *pair, - unsigned int rgb, unsigned int alpha, - rc_register_file file, unsigned int index) +int +rc_pair_alloc_source(struct rc_pair_instruction *pair, unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) { - int candidate = -1; - int candidate_quality = -1; - unsigned int alpha_used = 0; - unsigned int rgb_used = 0; - int i; + int candidate = -1; + int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; + int i; - if ((!rgb && !alpha) || file == RC_FILE_NONE) - return 0; + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; - /* Make sure only one presubtract operation is used per instruction. */ - if (file == RC_FILE_PRESUB) { - if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used - && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - return -1; - } + /* Make sure only one presubtract operation is used per instruction. */ + if (file == RC_FILE_PRESUB) { + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used && + index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } - if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used - && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - return -1; - } - } + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used && + index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } - for(i = 0; i < 3; ++i) { - int q = 0; - if (rgb) { - if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].File != file || - pair->RGB.Src[i].Index != index) { - rgb_used++; - continue; - } - q++; - } - } - if (alpha) { - if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].File != file || - pair->Alpha.Src[i].Index != index) { - alpha_used++; - continue; - } - q++; - } - } - if (q > candidate_quality) { - candidate_quality = q; - candidate = i; - } - } + for (i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || pair->RGB.Src[i].Index != index) { + rgb_used++; + continue; + } + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || pair->Alpha.Src[i].Index != index) { + alpha_used++; + continue; + } + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } - if (file == RC_FILE_PRESUB) { - candidate = RC_PAIR_PRESUB_SRC; - } else if (candidate < 0 || (rgb && rgb_used > 2) - || (alpha && alpha_used > 2)) { - return -1; - } + if (file == RC_FILE_PRESUB) { + candidate = RC_PAIR_PRESUB_SRC; + } else if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2)) { + return -1; + } - /* candidate >= 0 */ + /* candidate >= 0 */ - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].File = file; - pair->RGB.Src[candidate].Index = index; - if (candidate == RC_PAIR_PRESUB_SRC) { - /* For registers with the RC_FILE_PRESUB file, - * the index stores the presubtract op. */ - int src_regs = rc_presubtract_src_reg_count(index); - for(i = 0; i < src_regs; i++) { - pair->RGB.Src[i].Used = 1; - } - } - } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].File = file; - pair->Alpha.Src[candidate].Index = index; - if (candidate == RC_PAIR_PRESUB_SRC) { - /* For registers with the RC_FILE_PRESUB file, - * the index stores the presubtract op. */ - int src_regs = rc_presubtract_src_reg_count(index); - for(i=0; i < src_regs; i++) { - pair->Alpha.Src[i].Used = 1; - } - } - } + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for (i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } + } + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for (i = 0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } + } + } - return candidate; + return candidate; } -static void pair_foreach_source_callback( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb, - unsigned int swz, - unsigned int src) +static void +pair_foreach_source_callback(struct rc_pair_instruction *pair, void *data, + rc_pair_foreach_src_fn cb, unsigned int swz, unsigned int src) { - /* swz > 3 means that the swizzle is either not used, or a constant - * swizzle (e.g. 0, 1, 0.5). */ - if(swz > 3) - return; + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if (swz > 3) + return; - if(swz == RC_SWIZZLE_W) { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - unsigned int src_count = rc_presubtract_src_reg_count( - pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); - for(i = 0; i < src_count; i++) { - cb(data, &pair->Alpha.Src[i]); - } - } else { - cb(data, &pair->Alpha.Src[src]); - } - } else { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - unsigned int src_count = rc_presubtract_src_reg_count( - pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); - for(i = 0; i < src_count; i++) { - cb(data, &pair->RGB.Src[i]); - } - } - else { - cb(data, &pair->RGB.Src[src]); - } - } + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = + rc_presubtract_src_reg_count(pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = + rc_presubtract_src_reg_count(pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } else { + cb(data, &pair->RGB.Src[src]); + } + } } -void rc_pair_foreach_source_that_alpha_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb) +void +rc_pair_foreach_source_that_alpha_reads(struct rc_pair_instruction *pair, void *data, + rc_pair_foreach_src_fn cb) { - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(pair->Alpha.Opcode); - for(i = 0; i < info->NumSrcRegs; i++) { - pair_foreach_source_callback(pair, data, cb, - GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), - pair->Alpha.Arg[i].Source); - } + unsigned int i; + const struct rc_opcode_info *info = rc_get_opcode_info(pair->Alpha.Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } } -void rc_pair_foreach_source_that_rgb_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb) +void +rc_pair_foreach_source_that_rgb_reads(struct rc_pair_instruction *pair, void *data, + rc_pair_foreach_src_fn cb) { - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(pair->RGB.Opcode); - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - /* Find a swizzle that is either X,Y,Z,or W. We assume here - * that if one channel swizzles X,Y, or Z, then none of the - * other channels swizzle W, and vice-versa. */ - for(chan = 0; chan < 4; chan++) { - swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); - if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) - continue; - } - pair_foreach_source_callback(pair, data, cb, - swz, - pair->RGB.Arg[i].Source); - } + unsigned int i; + const struct rc_opcode_info *info = rc_get_opcode_info(pair->RGB.Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for (chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z || + swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, swz, pair->RGB.Arg[i].Source); + } } -struct rc_pair_instruction_source * rc_pair_get_src( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_arg * arg) +struct rc_pair_instruction_source * +rc_pair_get_src(struct rc_pair_instruction *pair_inst, struct rc_pair_instruction_arg *arg) { - unsigned int type; + unsigned int type; - type = rc_source_type_swz(arg->Swizzle); + type = rc_source_type_swz(arg->Swizzle); - if (type & RC_SOURCE_RGB) { - return &pair_inst->RGB.Src[arg->Source]; - } else if (type & RC_SOURCE_ALPHA) { - return &pair_inst->Alpha.Src[arg->Source]; - } else { - return NULL; - } + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } } -int rc_pair_get_src_index( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_source * src) +int +rc_pair_get_src_index(struct rc_pair_instruction *pair_inst, struct rc_pair_instruction_source *src) { - int i; - for (i = 0; i < 3; i++) { - if (&pair_inst->RGB.Src[i] == src - || &pair_inst->Alpha.Src[i] == src) { - return i; - } - } - return -1; + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; } diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h index 5464873bd29..831c6bc9385 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_pair.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -12,7 +12,6 @@ struct radeon_compiler; - /** * \file * Represents a paired ALU instruction, as found in R300 and R500 @@ -34,73 +33,64 @@ struct radeon_compiler; #define RC_PAIR_PRESUB_SRC 3 struct rc_pair_instruction_source { - unsigned int Used:1; - unsigned int File:4; - unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int Used : 1; + unsigned int File : 4; + unsigned int Index : RC_REGISTER_INDEX_BITS; }; struct rc_pair_instruction_arg { - unsigned int Source:2; - unsigned int Swizzle:12; - unsigned int Abs:1; - unsigned int Negate:1; + unsigned int Source : 2; + unsigned int Swizzle : 12; + unsigned int Abs : 1; + unsigned int Negate : 1; }; struct rc_pair_sub_instruction { - unsigned int Opcode:8; - unsigned int DestIndex:RC_REGISTER_INDEX_BITS; - unsigned int WriteMask:4; - unsigned int Target:2; - unsigned int OutputWriteMask:3; - unsigned int DepthWriteMask:1; - unsigned int Saturate:1; - unsigned int Omod:3; + unsigned int Opcode : 8; + unsigned int DestIndex : RC_REGISTER_INDEX_BITS; + unsigned int WriteMask : 4; + unsigned int Target : 2; + unsigned int OutputWriteMask : 3; + unsigned int DepthWriteMask : 1; + unsigned int Saturate : 1; + unsigned int Omod : 3; - struct rc_pair_instruction_source Src[4]; - struct rc_pair_instruction_arg Arg[3]; + struct rc_pair_instruction_source Src[4]; + struct rc_pair_instruction_arg Arg[3]; }; struct rc_pair_instruction { - struct rc_pair_sub_instruction RGB; - struct rc_pair_sub_instruction Alpha; + struct rc_pair_sub_instruction RGB; + struct rc_pair_sub_instruction Alpha; - unsigned int WriteALUResult:2; - unsigned int ALUResultCompare:3; - unsigned int Nop:1; - unsigned int SemWait:1; + unsigned int WriteALUResult : 2; + unsigned int ALUResultCompare : 3; + unsigned int Nop : 1; + unsigned int SemWait : 1; }; -typedef void (*rc_pair_foreach_src_fn) - (void *, struct rc_pair_instruction_source *); +typedef void (*rc_pair_foreach_src_fn)(void *, struct rc_pair_instruction_source *); /** * General helper functions for dealing with the paired instruction format. */ /*@{*/ -int rc_pair_alloc_source(struct rc_pair_instruction *pair, - unsigned int rgb, unsigned int alpha, - rc_register_file file, unsigned int index); +int rc_pair_alloc_source(struct rc_pair_instruction *pair, unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); -void rc_pair_foreach_source_that_alpha_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb); +void rc_pair_foreach_source_that_alpha_reads(struct rc_pair_instruction *pair, void *data, + rc_pair_foreach_src_fn cb); -void rc_pair_foreach_source_that_rgb_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb); +void rc_pair_foreach_source_that_rgb_reads(struct rc_pair_instruction *pair, void *data, + rc_pair_foreach_src_fn cb); -struct rc_pair_instruction_source * rc_pair_get_src( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_arg * arg); +struct rc_pair_instruction_source *rc_pair_get_src(struct rc_pair_instruction *pair_inst, + struct rc_pair_instruction_arg *arg); -int rc_pair_get_src_index( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_source * src); +int rc_pair_get_src_index(struct rc_pair_instruction *pair_inst, + struct rc_pair_instruction_source *src); /*@}*/ - /** * Compiler passes that operate with the paired format. */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c index 281286fb16b..5e7457f1aed 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_print.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -3,457 +3,511 @@ * SPDX-License-Identifier: MIT */ -#include "radeon_program.h" #include "radeon_compiler_util.h" +#include "radeon_program.h" #include -static const char * textarget_to_string(rc_texture_target target) +static const char * +textarget_to_string(rc_texture_target target) { - switch(target) { - case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; - case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; - case RC_TEXTURE_CUBE: return "CUBE"; - case RC_TEXTURE_3D: return "3D"; - case RC_TEXTURE_RECT: return "RECT"; - case RC_TEXTURE_2D: return "2D"; - case RC_TEXTURE_1D: return "1D"; - default: return "BAD_TEXTURE_TARGET"; - } + switch (target) { + case RC_TEXTURE_2D_ARRAY: + return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: + return "1D_ARRAY"; + case RC_TEXTURE_CUBE: + return "CUBE"; + case RC_TEXTURE_3D: + return "3D"; + case RC_TEXTURE_RECT: + return "RECT"; + case RC_TEXTURE_2D: + return "2D"; + case RC_TEXTURE_1D: + return "1D"; + default: + return "BAD_TEXTURE_TARGET"; + } } -static const char * presubtract_op_to_string(rc_presubtract_op op) +static const char * +presubtract_op_to_string(rc_presubtract_op op) { - switch(op) { - case RC_PRESUB_NONE: - return "NONE"; - case RC_PRESUB_BIAS: - return "(1 - 2 * src0)"; - case RC_PRESUB_SUB: - return "(src1 - src0)"; - case RC_PRESUB_ADD: - return "(src1 + src0)"; - case RC_PRESUB_INV: - return "(1 - src0)"; - default: - return "BAD_PRESUBTRACT_OP"; - } + switch (op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } } -static void print_omod_op(FILE * f, rc_omod_op op) +static void +print_omod_op(FILE *f, rc_omod_op op) { - const char * omod_str; + const char *omod_str; - switch(op) { - case RC_OMOD_MUL_1: - return; - case RC_OMOD_DISABLE: - omod_str = "(OMOD DISABLE)"; - break; - case RC_OMOD_MUL_2: - omod_str = "* 2"; - break; - case RC_OMOD_MUL_4: - omod_str = "* 4"; - break; - case RC_OMOD_MUL_8: - omod_str = "* 8"; - break; - case RC_OMOD_DIV_2: - omod_str = "/ 2"; - break; - case RC_OMOD_DIV_4: - omod_str = "/ 4"; - break; - case RC_OMOD_DIV_8: - omod_str = "/ 8"; - break; - default: - return; - } - fprintf(f, " %s", omod_str); + switch (op) { + case RC_OMOD_MUL_1: + return; + case RC_OMOD_DISABLE: + omod_str = "(OMOD DISABLE)"; + break; + case RC_OMOD_MUL_2: + omod_str = "* 2"; + break; + case RC_OMOD_MUL_4: + omod_str = "* 4"; + break; + case RC_OMOD_MUL_8: + omod_str = "* 8"; + break; + case RC_OMOD_DIV_2: + omod_str = "/ 2"; + break; + case RC_OMOD_DIV_4: + omod_str = "/ 4"; + break; + case RC_OMOD_DIV_8: + omod_str = "/ 8"; + break; + default: + return; + } + fprintf(f, " %s", omod_str); } -static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +static void +rc_print_comparefunc(FILE *f, const char *lhs, rc_compare_func func, const char *rhs) { - if (func == RC_COMPARE_FUNC_NEVER) { - fprintf(f, "false"); - } else if (func == RC_COMPARE_FUNC_ALWAYS) { - fprintf(f, "true"); - } else { - const char * op; - switch(func) { - case RC_COMPARE_FUNC_LESS: op = "<"; break; - case RC_COMPARE_FUNC_EQUAL: op = "=="; break; - case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; - case RC_COMPARE_FUNC_GREATER: op = ">"; break; - case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; - case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; - default: op = "???"; break; - } - fprintf(f, "%s %s %s", lhs, op, rhs); - } + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char *op; + switch (func) { + case RC_COMPARE_FUNC_LESS: + op = "<"; + break; + case RC_COMPARE_FUNC_EQUAL: + op = "=="; + break; + case RC_COMPARE_FUNC_LEQUAL: + op = "<="; + break; + case RC_COMPARE_FUNC_GREATER: + op = ">"; + break; + case RC_COMPARE_FUNC_NOTEQUAL: + op = "!="; + break; + case RC_COMPARE_FUNC_GEQUAL: + op = ">="; + break; + default: + op = "???"; + break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } } -static void rc_print_inline_float(FILE * f, int index) +static void +rc_print_inline_float(FILE *f, int index) { - fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index); + fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index); } -static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +static void +rc_print_register(FILE *f, rc_register_file file, int index, unsigned int reladdr) { - if (file == RC_FILE_NONE) { - fprintf(f, "none"); - } else if (file == RC_FILE_SPECIAL) { - switch(index) { - case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; - default: fprintf(f, "special[%i]", index); break; - } - } else if (file == RC_FILE_INLINE) { - rc_print_inline_float(f, index); - } else { - const char * filename; - switch(file) { - case RC_FILE_TEMPORARY: filename = "temp"; break; - case RC_FILE_INPUT: filename = "input"; break; - case RC_FILE_OUTPUT: filename = "output"; break; - case RC_FILE_ADDRESS: filename = "addr"; break; - case RC_FILE_CONSTANT: filename = "const"; break; - default: filename = "BAD FILE"; break; - } - fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); - } + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch (index) { + case RC_SPECIAL_ALU_RESULT: + fprintf(f, "aluresult"); + break; + default: + fprintf(f, "special[%i]", index); + break; + } + } else if (file == RC_FILE_INLINE) { + rc_print_inline_float(f, index); + } else { + const char *filename; + switch (file) { + case RC_FILE_TEMPORARY: + filename = "temp"; + break; + case RC_FILE_INPUT: + filename = "input"; + break; + case RC_FILE_OUTPUT: + filename = "output"; + break; + case RC_FILE_ADDRESS: + filename = "addr"; + break; + case RC_FILE_CONSTANT: + filename = "const"; + break; + default: + filename = "BAD FILE"; + break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } } -static void rc_print_mask(FILE * f, unsigned int mask) +static void +rc_print_mask(FILE *f, unsigned int mask) { - if (mask & RC_MASK_X) fprintf(f, "x"); - if (mask & RC_MASK_Y) fprintf(f, "y"); - if (mask & RC_MASK_Z) fprintf(f, "z"); - if (mask & RC_MASK_W) fprintf(f, "w"); + if (mask & RC_MASK_X) + fprintf(f, "x"); + if (mask & RC_MASK_Y) + fprintf(f, "y"); + if (mask & RC_MASK_Z) + fprintf(f, "z"); + if (mask & RC_MASK_W) + fprintf(f, "w"); } -static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +static void +rc_print_dst_register(FILE *f, struct rc_dst_register dst) { - rc_print_register(f, dst.File, dst.Index, 0); - if (dst.WriteMask != RC_MASK_XYZW) { - fprintf(f, "."); - rc_print_mask(f, dst.WriteMask); - } + rc_print_register(f, dst.File, dst.Index, 0); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } } -static char rc_swizzle_char(unsigned int swz) +static char +rc_swizzle_char(unsigned int swz) { - switch(swz) { - case RC_SWIZZLE_X: return 'x'; - case RC_SWIZZLE_Y: return 'y'; - case RC_SWIZZLE_Z: return 'z'; - case RC_SWIZZLE_W: return 'w'; - case RC_SWIZZLE_ZERO: return '0'; - case RC_SWIZZLE_ONE: return '1'; - case RC_SWIZZLE_HALF: return 'H'; - case RC_SWIZZLE_UNUSED: return '_'; - } - fprintf(stderr, "bad swz: %u\n", swz); - return '?'; + switch (swz) { + case RC_SWIZZLE_X: + return 'x'; + case RC_SWIZZLE_Y: + return 'y'; + case RC_SWIZZLE_Z: + return 'z'; + case RC_SWIZZLE_W: + return 'w'; + case RC_SWIZZLE_ZERO: + return '0'; + case RC_SWIZZLE_ONE: + return '1'; + case RC_SWIZZLE_HALF: + return 'H'; + case RC_SWIZZLE_UNUSED: + return '_'; + } + fprintf(stderr, "bad swz: %u\n", swz); + return '?'; } -static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +static void +rc_print_swizzle(FILE *f, unsigned int swizzle, unsigned int negate) { - unsigned int comp; - for(comp = 0; comp < 4; ++comp) { - rc_swizzle swz = GET_SWZ(swizzle, comp); - if (GET_BIT(negate, comp)) - fprintf(f, "-"); - fprintf(f, "%c", rc_swizzle_char(swz)); - } + unsigned int comp; + for (comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } } -static void rc_print_presub_instruction(FILE * f, - struct rc_presub_instruction inst) +static void +rc_print_presub_instruction(FILE *f, struct rc_presub_instruction inst) { - fprintf(f,"("); - switch(inst.Opcode){ - case RC_PRESUB_BIAS: - fprintf(f, "1 - 2 * "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_SUB: - rc_print_register(f, inst.SrcReg[1].File, - inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); - fprintf(f, " - "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_ADD: - rc_print_register(f, inst.SrcReg[1].File, - inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); - fprintf(f, " + "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_INV: - fprintf(f, "1 - "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - default: - break; - } - fprintf(f, ")"); + fprintf(f, "("); + switch (inst.Opcode) { + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[1].File, inst.SrcReg[1].Index, inst.SrcReg[1].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[1].File, inst.SrcReg[1].Index, inst.SrcReg[1].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); } -static void rc_print_src_register(FILE * f, struct rc_instruction * inst, - struct rc_src_register src) +static void +rc_print_src_register(FILE *f, struct rc_instruction *inst, struct rc_src_register src) { - int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); - if (src.Negate == RC_MASK_XYZW) - fprintf(f, "-"); - if (src.Abs) - fprintf(f, "|"); + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); - if(src.File == RC_FILE_PRESUB) - rc_print_presub_instruction(f, inst->U.I.PreSub); - else - rc_print_register(f, src.File, src.Index, src.RelAddr); + if (src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); - if (src.Abs && !trivial_negate) - fprintf(f, "|"); + if (src.Abs && !trivial_negate) + fprintf(f, "|"); - if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { - fprintf(f, "."); - rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); - } + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } - if (src.Abs && trivial_negate) - fprintf(f, "|"); + if (src.Abs && trivial_negate) + fprintf(f, "|"); } -static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) +static unsigned +update_branch_depth(rc_opcode opcode, unsigned *branch_depth) { - switch (opcode) { - case RC_OPCODE_IF: - case RC_OPCODE_BGNLOOP: - return (*branch_depth)++ * 2; + switch (opcode) { + case RC_OPCODE_IF: + case RC_OPCODE_BGNLOOP: + return (*branch_depth)++ * 2; - case RC_OPCODE_ENDIF: - case RC_OPCODE_ENDLOOP: - assert(*branch_depth > 0); - return --(*branch_depth) * 2; + case RC_OPCODE_ENDIF: + case RC_OPCODE_ENDLOOP: + assert(*branch_depth > 0); + return --(*branch_depth) * 2; - case RC_OPCODE_ELSE: - assert(*branch_depth > 0); - return (*branch_depth - 1) * 2; + case RC_OPCODE_ELSE: + assert(*branch_depth > 0); + return (*branch_depth - 1) * 2; - default: - return *branch_depth * 2; - } + default: + return *branch_depth * 2; + } } -static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) +static void +rc_print_normal_instruction(FILE *f, struct rc_instruction *inst, unsigned *branch_depth) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int reg; - unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); - fprintf(f, "%s", opcode->Name); + fprintf(f, "%s", opcode->Name); - switch(inst->U.I.SaturateMode) { - case RC_SATURATE_NONE: break; - case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; - case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; - default: fprintf(f, "_BAD_SAT"); break; - } + switch (inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: + break; + case RC_SATURATE_ZERO_ONE: + fprintf(f, "_SAT"); + break; + case RC_SATURATE_MINUS_PLUS_ONE: + fprintf(f, "_SAT2"); + break; + default: + fprintf(f, "_BAD_SAT"); + break; + } - if (opcode->HasDstReg) { - fprintf(f, " "); - rc_print_dst_register(f, inst->U.I.DstReg); - print_omod_op(f, inst->U.I.Omod); - if (opcode->NumSrcRegs) - fprintf(f, ","); - } + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + print_omod_op(f, inst->U.I.Omod); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } - for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { - if (reg > 0) - fprintf(f, ","); - fprintf(f, " "); - rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); - } + for (reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); + } - if (opcode->HasTexture) { - fprintf(f, ", %s%s[%u]%s%s", - textarget_to_string(inst->U.I.TexSrcTarget), - inst->U.I.TexShadow ? "SHADOW" : "", - inst->U.I.TexSrcUnit, - inst->U.I.TexSemWait ? " SEM_WAIT" : "", - inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : ""); - } + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]%s%s", textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", inst->U.I.TexSrcUnit, + inst->U.I.TexSemWait ? " SEM_WAIT" : "", + inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : ""); + } - fprintf(f, ";"); + fprintf(f, ";"); - if (inst->U.I.WriteALUResult) { - fprintf(f, " [aluresult = ("); - rc_print_comparefunc(f, - (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", - inst->U.I.ALUResultCompare, "0"); - fprintf(f, ")]"); - } + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } - if (inst->U.I.DstReg.Pred == RC_PRED_SET) { - fprintf(f, " PRED_SET"); - } else if (inst->U.I.DstReg.Pred == RC_PRED_INV) { - fprintf(f, " PRED_INV"); - } + if (inst->U.I.DstReg.Pred == RC_PRED_SET) { + fprintf(f, " PRED_SET"); + } else if (inst->U.I.DstReg.Pred == RC_PRED_INV) { + fprintf(f, " PRED_INV"); + } - fprintf(f, "\n"); + fprintf(f, "\n"); } -static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) +static void +rc_print_pair_instruction(FILE *f, struct rc_instruction *fullinst, unsigned *branch_depth) { - struct rc_pair_instruction * inst = &fullinst->U.P; - int printedsrc = 0; - unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? - inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); + struct rc_pair_instruction *inst = &fullinst->U.P; + int printedsrc = 0; + unsigned spaces = update_branch_depth( + inst->RGB.Opcode != RC_OPCODE_NOP ? inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - if (printedsrc) - fprintf(f, ", "); - fprintf(f, "src%i.xyz = ", src); - rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); - printedsrc = 1; - } - if (inst->Alpha.Src[src].Used) { - if (printedsrc) - fprintf(f, ", "); - fprintf(f, "src%i.w = ", src); - rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); - printedsrc = 1; - } - } - if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - fprintf(f, ", srcp.xyz = %s", - presubtract_op_to_string( - inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); - } - if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - fprintf(f, ", srcp.w = %s", - presubtract_op_to_string( - inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); - } - if (inst->SemWait) { - fprintf(f, " SEM_WAIT"); - } - fprintf(f, "\n"); + for (unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if (inst->SemWait) { + fprintf(f, " SEM_WAIT"); + } + fprintf(f, "\n"); - if (inst->RGB.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->RGB.Opcode); - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); - fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - if (inst->WriteALUResult == RC_ALURESULT_X) - fprintf(f, " aluresult"); + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf( + f, " color[%i].%s%s%s", inst->RGB.Target, (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); - print_omod_op(f, inst->RGB.Omod); + print_omod_op(f, inst->RGB.Omod); - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc", neg, abs); - if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) - fprintf(f,"p"); - else - fprintf(f,"%d", inst->RGB.Arg[arg].Source); - fprintf(f,".%c%c%c%s", - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), - abs); - } - fprintf(f, "\n"); - } + for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char *abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char *neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if (inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f, "p"); + else + fprintf(f, "%d", inst->RGB.Arg[arg].Source); + fprintf(f, ".%c%c%c%s", rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), abs); + } + fprintf(f, "\n"); + } - if (inst->Alpha.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Alpha.Opcode); - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); - fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - fprintf(f, " color[%i].w", inst->Alpha.Target); - if (inst->Alpha.DepthWriteMask) - fprintf(f, " depth.w"); - if (inst->WriteALUResult == RC_ALURESULT_W) - fprintf(f, " aluresult"); + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); - print_omod_op(f, inst->Alpha.Omod); + print_omod_op(f, inst->Alpha.Omod); - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc", neg, abs); - if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) - fprintf(f,"p"); - else - fprintf(f,"%d", inst->Alpha.Arg[arg].Source); - fprintf(f,".%c%s", - rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); - } - fprintf(f, "\n"); - } + for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char *abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char *neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if (inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f, "p"); + else + fprintf(f, "%d", inst->Alpha.Arg[arg].Source); + fprintf(f, ".%c%s", rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); + } + fprintf(f, "\n"); + } - if (inst->WriteALUResult) { - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); + if (inst->WriteALUResult) { + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); - fprintf(f, " [aluresult = ("); - rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); - fprintf(f, ")]\n"); - } + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } } /** * Print program to stderr, default options. */ -void rc_print_program(const struct rc_program *prog) +void +rc_print_program(const struct rc_program *prog) { - unsigned int linenum = 0; - unsigned branch_depth = 0; - struct rc_instruction *inst; + unsigned int linenum = 0; + unsigned branch_depth = 0; + struct rc_instruction *inst; - fprintf(stderr, "# Radeon Compiler Program\n"); + fprintf(stderr, "# Radeon Compiler Program\n"); - for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { - fprintf(stderr, "%3d: ", linenum); + for (inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); - if (inst->Type == RC_INSTRUCTION_PAIR) - rc_print_pair_instruction(stderr, inst, &branch_depth); - else - rc_print_normal_instruction(stderr, inst, &branch_depth); + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst, &branch_depth); + else + rc_print_normal_instruction(stderr, inst, &branch_depth); - linenum++; - } + linenum++; + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c index d7d760dc5f3..1c884372159 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_tex.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -10,83 +10,79 @@ /* Series of transformations to be done on textures. */ -static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, - int tmu) +static struct rc_src_register +shadow_fail_value(struct r300_fragment_program_compiler *compiler, int tmu) { - struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + struct rc_src_register reg = {0, 0, 0, 0, 0, 0}; - reg.File = RC_FILE_NONE; - reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000, - compiler->state.unit[tmu].texture_swizzle); - return reg; + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000, compiler->state.unit[tmu].texture_swizzle); + return reg; } -static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, - int tmu) +static struct rc_src_register +shadow_pass_value(struct r300_fragment_program_compiler *compiler, int tmu) { - struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + struct rc_src_register reg = {0, 0, 0, 0, 0, 0}; - reg.File = RC_FILE_NONE; - reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, - compiler->state.unit[tmu].texture_swizzle); - return reg; + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, compiler->state.unit[tmu].texture_swizzle); + return reg; } -static void scale_texcoords(struct r300_fragment_program_compiler *compiler, - struct rc_instruction *inst, - unsigned state_constant) +static void +scale_texcoords(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst, + unsigned state_constant) { - struct rc_instruction *inst_mov; + struct rc_instruction *inst_mov; - unsigned temp = rc_find_free_temporary(&compiler->Base); + unsigned temp = rc_find_free_temporary(&compiler->Base); - inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MUL; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mov->U.I.SrcReg[1].Index = - rc_constants_add_state(&compiler->Base.Program.Constants, - state_constant, inst->U.I.TexSrcUnit); + inst_mov->U.I.Opcode = RC_OPCODE_MUL; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mov->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants, + state_constant, inst->U.I.TexSrcUnit); - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; } -static void projective_divide(struct r300_fragment_program_compiler *compiler, - struct rc_instruction *inst) +static void +projective_divide(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst) { - struct rc_instruction *inst_mul, *inst_rcp; + struct rc_instruction *inst_mul, *inst_rcp; - unsigned temp = rc_find_free_temporary(&compiler->Base); + unsigned temp = rc_find_free_temporary(&compiler->Base); - inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = temp; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - /* Because the input can be arbitrarily swizzled, - * read the component mapped to W. */ - inst_rcp->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = temp; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Because the input can be arbitrarily swizzled, + * read the component mapped to W. */ + inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); - inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = temp; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = temp; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = temp; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.Opcode = RC_OPCODE_TEX; - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; } /** @@ -97,353 +93,343 @@ static void projective_divide(struct r300_fragment_program_compiler *compiler, * - extract operand swizzles * - introduce a temporary register when write masks are needed */ -int radeonTransformTEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) +int +radeonTransformTEX(struct radeon_compiler *c, struct rc_instruction *inst, void *data) { - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; - int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT; + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)data; + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT; - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_TXD && - inst->U.I.Opcode != RC_OPCODE_TXL && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; + if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - ((c->Program.ShadowSamplers & (1U << inst->U.I.TexSrcUnit)) || - (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1U << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); - } else { - inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); - } + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + } else { + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + } - return 1; - } else { - struct rc_instruction * inst_rcp = NULL; - struct rc_instruction *inst_mul, *inst_add, *inst_cmp; - unsigned tmp_texsample; - unsigned tmp_sum; - int pass, fail; + return 1; + } else { + struct rc_instruction *inst_rcp = NULL; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; + unsigned tmp_texsample; + unsigned tmp_sum; + int pass, fail; - /* Save the output register. */ - struct rc_dst_register output_reg = inst->U.I.DstReg; - unsigned saturate_mode = inst->U.I.SaturateMode; + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; - /* Redirect TEX to a new temp. */ - tmp_texsample = rc_find_free_temporary(c); - inst->U.I.SaturateMode = 0; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = tmp_texsample; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - tmp_sum = rc_find_free_temporary(c); + tmp_sum = rc_find_free_temporary(c); - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - /* Compute 1/W. */ - inst_rcp = rc_insert_new_instruction(c, inst); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tmp_sum; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); - } + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_sum; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + } - /* Divide Z by W (if it's TXP) and saturate. */ - inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); - inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = tmp_sum; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = tmp_sum; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - } + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + } - /* Add the depth texture value. */ - inst_add = rc_insert_new_instruction(c, inst_mul); - inst_add->U.I.Opcode = RC_OPCODE_ADD; - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = tmp_sum; - inst_add->U.I.DstReg.WriteMask = RC_MASK_W; - inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[0].Index = tmp_sum; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[1].Index = tmp_texsample; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; - /* Note that SrcReg[0] is r, SrcReg[1] is tex and: - * LESS: r < tex <=> -tex+r < 0 - * GEQUAL: r >= tex <=> not (-tex+r < 0) - * GREATER: r > tex <=> tex-r < 0 - * LEQUAL: r <= tex <=> not ( tex-r < 0) - * EQUAL: GEQUAL - * NOTEQUAL:LESS - */ + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ - /* This negates either r or tex: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || - comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) - inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; - else - inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; + else + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - /* This negates the whole expression: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || - comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } + /* This negates the whole expression: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } - inst_cmp = rc_insert_new_instruction(c, inst_add); - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - inst_cmp->U.I.SaturateMode = saturate_mode; - inst_cmp->U.I.DstReg = output_reg; - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = tmp_sum; - inst_cmp->U.I.SrcReg[0].Swizzle = - combine_swizzles(RC_SWIZZLE_WWWW, - compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); - inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); - inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp = rc_insert_new_instruction(c, inst_add); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = combine_swizzles( + RC_SWIZZLE_WWWW, compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); - assert(tmp_texsample != tmp_sum); - } - } + assert(tmp_texsample != tmp_sum); + } + } - /* R300 cannot sample from rectangles and the wrap mode fallback needs - * normalized coordinates anyway. */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { - scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); - inst->U.I.TexSrcTarget = RC_TEXTURE_2D; - } + /* R300 cannot sample from rectangles and the wrap mode fallback needs + * normalized coordinates anyway. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { + scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } - /* Divide by W if needed. */ - if (inst->U.I.Opcode == RC_OPCODE_TXP && - (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || - compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { - projective_divide(compiler, inst); - } + /* Divide by W if needed. */ + if (inst->U.I.Opcode == RC_OPCODE_TXP && + (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { + projective_divide(compiler, inst); + } - /* Texture wrap modes don't work on NPOT textures. - * - * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and - * mirroring are not. If we need to repeat, we do: - * - * MUL temp, texcoord, - * FRC temp, temp ; Discard integer portion of coords - * - * This gives us coords in [0, 1]. - * - * Mirroring is trickier. We're going to start out like repeat: - * - * MUL temp, texcoord, ; De-mirror across axes - * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] - * ; so scale to [0, 1] - * FRC temp, temp ; Make the pattern repeat - * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] - * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. - * ; The pattern is backwards, so reverse it (1-x). - * - * This gives us coords in [0, 1]. - * - * ~ C & M. ;) - */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - wrapmode != RC_WRAP_NONE) { - struct rc_instruction *inst_mov; - unsigned temp = rc_find_free_temporary(c); + /* Texture wrap modes don't work on NPOT textures. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); - if (wrapmode == RC_WRAP_REPEAT) { - /* Both instructions will be paired up. */ - struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); - inst_frc->U.I.Opcode = RC_OPCODE_FRC; - inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_frc->U.I.DstReg.Index = temp; - inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { - /* - * Function: - * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) - * - * Code: - * MUL temp, src0, 0.5 - * FRC temp, temp - * MAD temp, temp, 2, -1 - * ADD temp, 1, -abs(temp) - */ + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ - struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; - unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; - inst_mul = rc_insert_new_instruction(c, inst->Prev); + inst_mul = rc_insert_new_instruction(c, inst->Prev); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = temp; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; - inst_frc = rc_insert_new_instruction(c, inst->Prev); + inst_frc = rc_insert_new_instruction(c, inst->Prev); - inst_frc->U.I.Opcode = RC_OPCODE_FRC; - inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_frc->U.I.DstReg.Index = temp; - inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_frc->U.I.SrcReg[0].Index = temp; - inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); - inst_mad = rc_insert_new_instruction(c, inst->Prev); + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = temp; - inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[0].Index = temp; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Index = two; - inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; - inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; - inst_add = rc_insert_new_instruction(c, inst->Prev); + inst_add = rc_insert_new_instruction(c, inst->Prev); - inst_add->U.I.Opcode = RC_OPCODE_ADD; - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = temp; - inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[1].Index = temp; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; - inst_add->U.I.SrcReg[1].Abs = 1; - inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; - } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { - /* - * Mirrored clamp modes are bloody simple, we just use abs - * to mirror [0, 1] into [-1, 0]. This works for - * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. - */ - struct rc_instruction *inst_mov; + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; - inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[0].Abs = 1; - } + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } - /* Preserve W for TXP/TXB. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; - } + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } - /* NPOT -> POT conversion for 3D textures. */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { - struct rc_instruction *inst_mov; - unsigned temp = rc_find_free_temporary(c); + /* NPOT -> POT conversion for 3D textures. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); - /* Saturate XYZ. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Saturate XYZ. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - /* Copy W. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Copy W. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; - scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); - } + scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); + } - /* Cannot write texture to output registers or with saturate (all chips), - * or with masks (non-r500). */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst->U.I.SaturateMode || - (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.SaturateMode || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->U.I.SaturateMode = 0; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } - return 1; + return 1; } diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h index 4845c71edd7..17bd43eeaf3 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_tex.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h @@ -9,9 +9,6 @@ #include "radeon_compiler.h" #include "radeon_program.h" -int radeonTransformTEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data); +int radeonTransformTEX(struct radeon_compiler *c, struct rc_instruction *inst, void *data); #endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_regalloc.c index ef3d7fdde46..083b3151cd8 100644 --- a/src/gallium/drivers/r300/compiler/radeon_regalloc.c +++ b/src/gallium/drivers/r300/compiler/radeon_regalloc.c @@ -9,435 +9,494 @@ #define VERBOSE 0 -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) +#define DBG(...) \ + do { \ + if (VERBOSE) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) -const struct rc_class rc_class_list_vp [] = { - {RC_REG_CLASS_VP_SINGLE, 4, - {RC_MASK_X, - RC_MASK_Y, - RC_MASK_Z, - RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_VP_DOUBLE, 6, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_X | RC_MASK_Z, - RC_MASK_X | RC_MASK_W, - RC_MASK_Y | RC_MASK_Z, - RC_MASK_Y | RC_MASK_W, - RC_MASK_Z | RC_MASK_W}}, - {RC_REG_CLASS_VP_TRIPLE, 4, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, - RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_VP_QUADRUPLE, 1, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}} -}; +const struct rc_class rc_class_list_vp[] = { + { + RC_REG_CLASS_VP_SINGLE, + 4, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z, + RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_VP_DOUBLE, + 6, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}, + }, + { + RC_REG_CLASS_VP_TRIPLE, + 4, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_VP_QUADRUPLE, + 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }}; -const struct rc_class rc_class_list_fp [] = { - {RC_REG_CLASS_FP_SINGLE, 3, - {RC_MASK_X, - RC_MASK_Y, - RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_DOUBLE, 3, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_X | RC_MASK_Z, - RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_TRIPLE, 1, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_ALPHA, 1, - {RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3, - {RC_MASK_X | RC_MASK_W, - RC_MASK_Y | RC_MASK_W, - RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_X, 1, - {RC_MASK_X, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_Y, 1, - {RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_Z, 1, - {RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_XY, 1, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_YZ, 1, - {RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_XZ, 1, - {RC_MASK_X | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_XW, 1, - {RC_MASK_X | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_YW, 1, - {RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_ZW, 1, - {RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_XYW, 1, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_YZW, 1, - {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_FP_XZW, 1, - {RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE, - RC_MASK_NONE}} -}; +const struct rc_class rc_class_list_fp[] = { + { + RC_REG_CLASS_FP_SINGLE, + 3, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_DOUBLE, + 3, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_TRIPLE, + 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_ALPHA, + 1, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, + 3, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, + 3, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, + 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_X, + 1, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_Y, + 1, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_Z, + 1, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_XY, + 1, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_YZ, + 1, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_XZ, + 1, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_XW, + 1, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_YW, + 1, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_ZW, + 1, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_XYW, + 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_YZW, + 1, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }, + { + RC_REG_CLASS_FP_XZW, + 1, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE, + RC_MASK_NONE}, + }}; -static void print_live_intervals(struct live_intervals * src) +static void +print_live_intervals(struct live_intervals *src) { - if (!src || !src->Used) { - DBG("(null)"); - return; - } + if (!src || !src->Used) { + DBG("(null)"); + return; + } - DBG("(%i,%i)", src->Start, src->End); + DBG("(%i,%i)", src->Start, src->End); } -static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) +static int +overlap_live_intervals(struct live_intervals *a, struct live_intervals *b) { - if (VERBOSE) { - DBG("overlap_live_intervals: "); - print_live_intervals(a); - DBG(" to "); - print_live_intervals(b); - DBG("\n"); - } + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(a); + DBG(" to "); + print_live_intervals(b); + DBG("\n"); + } - if (!a->Used || !b->Used) { - DBG(" unused interval\n"); - return 0; - } + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; + } - if (a->Start > b->Start) { - if (a->Start < b->End) { - DBG(" overlap\n"); - return 1; - } - } else if (b->Start > a->Start) { - if (b->Start < a->End) { - DBG(" overlap\n"); - return 1; - } - } else { /* a->Start == b->Start */ - if (a->Start != a->End && b->Start != b->End) { - DBG(" overlap\n"); - return 1; - } - } + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { + DBG(" overlap\n"); + return 1; + } + } - DBG(" no overlap\n"); + DBG(" no overlap\n"); - return 0; + return 0; } -int rc_find_class( - const struct rc_class * classes, - unsigned int writemask, - unsigned int max_writemask_count) +int +rc_find_class(const struct rc_class *classes, unsigned int writemask, + unsigned int max_writemask_count) { - unsigned int i; - for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) { - unsigned int j; - if (classes[i].WritemaskCount > max_writemask_count) { - continue; - } - for (j = 0; j < classes[i].WritemaskCount; j++) { - if (classes[i].Writemasks[j] == writemask) { - return i; - } - } - } - return -1; + unsigned int i; + for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { + continue; + } + for (j = 0; j < classes[i].WritemaskCount; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; } -unsigned int rc_overlap_live_intervals_array( - struct live_intervals * a, - struct live_intervals * b) +unsigned int +rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b) { - unsigned int a_chan, b_chan; - for (a_chan = 0; a_chan < 4; a_chan++) { - for (b_chan = 0; b_chan < 4; b_chan++) { - if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { - return 1; - } - } - } - return 0; + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } + } + return 0; } #if VERBOSE -static void print_reg(int reg) +static void +print_reg(int reg) { - unsigned int index = reg_get_index(reg); - unsigned int mask = reg_get_writemask(reg); - fprintf(stderr, "Temp[%u].%c%c%c%c", index, - mask & RC_MASK_X ? 'x' : '_', - mask & RC_MASK_Y ? 'y' : '_', - mask & RC_MASK_Z ? 'z' : '_', - mask & RC_MASK_W ? 'w' : '_'); + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); } #endif -static void add_register_conflicts( - struct ra_regs * regs, - unsigned int max_temp_regs) +static void +add_register_conflicts(struct ra_regs *regs, unsigned int max_temp_regs) { - unsigned int index, a_mask, b_mask; - for (index = 0; index < max_temp_regs; index++) { - for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { - for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; - b_mask++) { - if (a_mask & b_mask) { - ra_add_reg_conflict(regs, - get_reg_id(index, a_mask), - get_reg_id(index, b_mask)); - } - } - } - } + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for (a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, get_reg_id(index, a_mask), get_reg_id(index, b_mask)); + } + } + } + } } -void rc_build_interference_graph( - struct ra_graph * graph, - struct rc_list * variables) +void +rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables) { - unsigned node_index; - struct rc_list * var_ptr; + unsigned node_index; + struct rc_list *var_ptr; - /* Build the interference graph */ - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - struct rc_list * a, * b; - unsigned int b_index; + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_list *a, *b; + unsigned int b_index; - for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; - b; b = b->Next, b_index++) { - struct rc_variable * var_a = a->Item; - while (var_a) { - struct rc_variable * var_b = b->Item; - while (var_b) { - if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) { - ra_add_node_interference(graph, - node_index, b_index); - } - var_b = var_b->Friend; - } - var_a = var_a->Friend; - } - } - } + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; b; b = b->Next, b_index++) { + struct rc_variable *var_a = a->Item; + while (var_a) { + struct rc_variable *var_b = b->Item; + while (var_b) { + if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } + } } -void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog) +void +rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog) { - unsigned i, j, index, class_count, max_temps; - unsigned **ra_q_values; + unsigned i, j, index, class_count, max_temps; + unsigned **ra_q_values; - /* Pre-computed q values. This array describes the maximum number of - * a class's [row] registers that are in conflict with a single - * register from another class [column]. - * - * For example: - * q_values[0][2] is 3, because a register from class 2 - * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from - * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y, - * and T0.z. - * - * q_values[2][0] is 1, because a register from class 0 - * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from - * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz - * - * The q values for each register class [row] will never be greater - * than the maximum number of writemask combinations for that class. - * - * For example: - * - * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination, - * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater - * than 1. - */ - const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = { - {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}, - {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3}, - {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}, - {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3}, - {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1}, - {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, - {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1}, - {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1}, - {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, - {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} - }; + /* Pre-computed q values. This array describes the maximum number of + * a class's [row] registers that are in conflict with a single + * register from another class [column]. + * + * For example: + * q_values[0][2] is 3, because a register from class 2 + * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from + * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y, + * and T0.z. + * + * q_values[2][0] is 1, because a register from class 0 + * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from + * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz + * + * The q values for each register class [row] will never be greater + * than the maximum number of writemask combinations for that class. + * + * For example: + * + * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination, + * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater + * than 1. + */ + const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = { + {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}, + {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}, + {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3}, + {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1}, + {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, + {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; - const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = { - {1, 2, 3, 4}, - {3, 5, 6, 6}, - {3, 4, 4, 4}, - {1, 1, 1, 1} - }; + const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {{1, 2, 3, 4}, + {3, 5, 6, 6}, + {3, 4, 4, 4}, + {1, 1, 1, 1}}; - if (prog == RC_FRAGMENT_PROGRAM) { - s->class_list = rc_class_list_fp; - class_count = RC_REG_CLASS_FP_COUNT; - max_temps = R500_PFS_NUM_TEMP_REGS; - } else { - s->class_list = rc_class_list_vp; - class_count = RC_REG_CLASS_VP_COUNT; - max_temps = R300_VS_MAX_TEMPS; - } + if (prog == RC_FRAGMENT_PROGRAM) { + s->class_list = rc_class_list_fp; + class_count = RC_REG_CLASS_FP_COUNT; + max_temps = R500_PFS_NUM_TEMP_REGS; + } else { + s->class_list = rc_class_list_vp; + class_count = RC_REG_CLASS_VP_COUNT; + max_temps = R300_VS_MAX_TEMPS; + } - /* Allocate the main ra data structure */ - s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, - true); + /* Allocate the main ra data structure */ + s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, true); - /* Create the register classes */ - for (i = 0; i < class_count; i++) { - const struct rc_class *class = &s->class_list[i]; - s->classes[class->ID] = ra_alloc_reg_class(s->regs); + /* Create the register classes */ + for (i = 0; i < class_count; i++) { + const struct rc_class *class = &s->class_list[i]; + s->classes[class->ID] = ra_alloc_reg_class(s->regs); - /* Assign registers to the classes */ - for (index = 0; index < max_temps; index++) { - for (j = 0; j < class->WritemaskCount; j++) { - int reg_id = get_reg_id(index, - class->Writemasks[j]); - ra_class_add_reg(s->classes[class->ID], reg_id); - } - } - } + /* Assign registers to the classes */ + for (index = 0; index < max_temps; index++) { + for (j = 0; j < class->WritemaskCount; j++) { + int reg_id = get_reg_id(index, class->Writemasks[j]); + ra_class_add_reg(s->classes[class->ID], reg_id); + } + } + } - /* Set the q values. The q_values array is indexed based on - * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be - * different than the ID assigned to that class by ra. - * This why we need to manually construct this list. - */ - ra_q_values = MALLOC(class_count * sizeof(unsigned *)); + /* Set the q values. The q_values array is indexed based on + * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be + * different than the ID assigned to that class by ra. + * This why we need to manually construct this list. + */ + ra_q_values = MALLOC(class_count * sizeof(unsigned *)); - for (i = 0; i < class_count; i++) { - ra_q_values[i] = MALLOC(class_count * sizeof(unsigned)); - for (j = 0; j < class_count; j++) { - if (prog == RC_FRAGMENT_PROGRAM) - ra_q_values[i][j] = q_values_fp[i][j]; - else - ra_q_values[i][j] = q_values_vp[i][j]; - } - } + for (i = 0; i < class_count; i++) { + ra_q_values[i] = MALLOC(class_count * sizeof(unsigned)); + for (j = 0; j < class_count; j++) { + if (prog == RC_FRAGMENT_PROGRAM) + ra_q_values[i][j] = q_values_fp[i][j]; + else + ra_q_values[i][j] = q_values_vp[i][j]; + } + } - /* Add register conflicts */ - add_register_conflicts(s->regs, max_temps); + /* Add register conflicts */ + add_register_conflicts(s->regs, max_temps); - ra_set_finalize(s->regs, ra_q_values); + ra_set_finalize(s->regs, ra_q_values); - for (i = 0; i < class_count; i++) { - FREE(ra_q_values[i]); - } - FREE(ra_q_values); + for (i = 0; i < class_count; i++) { + FREE(ra_q_values[i]); + } + FREE(ra_q_values); } -void rc_destroy_regalloc_state(struct rc_regalloc_state *s) +void +rc_destroy_regalloc_state(struct rc_regalloc_state *s) { - ralloc_free(s->regs); + ralloc_free(s->regs); } diff --git a/src/gallium/drivers/r300/compiler/radeon_regalloc.h b/src/gallium/drivers/r300/compiler/radeon_regalloc.h index 09f823daa7a..cc6c2e8035a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_regalloc.h +++ b/src/gallium/drivers/r300/compiler/radeon_regalloc.h @@ -9,114 +9,111 @@ #ifndef RADEON_REGALLOC_H #define RADEON_REGALLOC_H +#include "util/ralloc.h" #include "util/register_allocate.h" #include "util/u_memory.h" -#include "util/ralloc.h" #include "radeon_variable.h" struct ra_regs; enum rc_reg_class { - RC_REG_CLASS_FP_SINGLE, - RC_REG_CLASS_FP_DOUBLE, - RC_REG_CLASS_FP_TRIPLE, - RC_REG_CLASS_FP_ALPHA, - RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, - RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, - RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, - RC_REG_CLASS_FP_X, - RC_REG_CLASS_FP_Y, - RC_REG_CLASS_FP_Z, - RC_REG_CLASS_FP_XY, - RC_REG_CLASS_FP_YZ, - RC_REG_CLASS_FP_XZ, - RC_REG_CLASS_FP_XW, - RC_REG_CLASS_FP_YW, - RC_REG_CLASS_FP_ZW, - RC_REG_CLASS_FP_XYW, - RC_REG_CLASS_FP_YZW, - RC_REG_CLASS_FP_XZW, - RC_REG_CLASS_FP_COUNT + RC_REG_CLASS_FP_SINGLE, + RC_REG_CLASS_FP_DOUBLE, + RC_REG_CLASS_FP_TRIPLE, + RC_REG_CLASS_FP_ALPHA, + RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_FP_X, + RC_REG_CLASS_FP_Y, + RC_REG_CLASS_FP_Z, + RC_REG_CLASS_FP_XY, + RC_REG_CLASS_FP_YZ, + RC_REG_CLASS_FP_XZ, + RC_REG_CLASS_FP_XW, + RC_REG_CLASS_FP_YW, + RC_REG_CLASS_FP_ZW, + RC_REG_CLASS_FP_XYW, + RC_REG_CLASS_FP_YZW, + RC_REG_CLASS_FP_XZW, + RC_REG_CLASS_FP_COUNT }; enum rc_reg_class_vp { - RC_REG_CLASS_VP_SINGLE, - RC_REG_CLASS_VP_DOUBLE, - RC_REG_CLASS_VP_TRIPLE, - RC_REG_CLASS_VP_QUADRUPLE, - RC_REG_CLASS_VP_COUNT + RC_REG_CLASS_VP_SINGLE, + RC_REG_CLASS_VP_DOUBLE, + RC_REG_CLASS_VP_TRIPLE, + RC_REG_CLASS_VP_QUADRUPLE, + RC_REG_CLASS_VP_COUNT }; struct rc_regalloc_state { - struct ra_regs *regs; - struct ra_class *classes[RC_REG_CLASS_FP_COUNT]; - const struct rc_class *class_list; + struct ra_regs *regs; + struct ra_class *classes[RC_REG_CLASS_FP_COUNT]; + const struct rc_class *class_list; }; struct register_info { - struct live_intervals Live[4]; + struct live_intervals Live[4]; - unsigned int Used:1; - unsigned int Allocated:1; - unsigned int File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; - unsigned int Writemask; + unsigned int Used : 1; + unsigned int Allocated : 1; + unsigned int File : 3; + unsigned int Index : RC_REGISTER_INDEX_BITS; + unsigned int Writemask; }; struct regalloc_state { - struct radeon_compiler * C; + struct radeon_compiler *C; - struct register_info * Input; - unsigned int NumInputs; + struct register_info *Input; + unsigned int NumInputs; - struct register_info * Temporary; - unsigned int NumTemporaries; + struct register_info *Temporary; + unsigned int NumTemporaries; - unsigned int Simple; - int LoopEnd; + unsigned int Simple; + int LoopEnd; }; struct rc_class { - enum rc_reg_class ID; + enum rc_reg_class ID; - unsigned int WritemaskCount; + unsigned int WritemaskCount; - /** List of writemasks that belong to this class */ - unsigned int Writemasks[6]; + /** List of writemasks that belong to this class */ + unsigned int Writemasks[6]; }; -int rc_find_class( - const struct rc_class * classes, - unsigned int writemask, - unsigned int max_writemask_count); +int rc_find_class(const struct rc_class *classes, unsigned int writemask, + unsigned int max_writemask_count); -unsigned int rc_overlap_live_intervals_array( - struct live_intervals * a, - struct live_intervals * b); +unsigned int rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b); -static inline unsigned int reg_get_index(int reg) +static inline unsigned int +reg_get_index(int reg) { - return reg / RC_MASK_XYZW; + return reg / RC_MASK_XYZW; }; -static inline unsigned int reg_get_writemask(int reg) +static inline unsigned int +reg_get_writemask(int reg) { - return (reg % RC_MASK_XYZW) + 1; + return (reg % RC_MASK_XYZW) + 1; }; -static inline int get_reg_id(unsigned int index, unsigned int writemask) +static inline int +get_reg_id(unsigned int index, unsigned int writemask) { - assert(writemask); - if (writemask == 0) { - return 0; - } - return (index * RC_MASK_XYZW) + (writemask - 1); + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); } -void rc_build_interference_graph( - struct ra_graph * graph, - struct rc_list * variables); +void rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables); void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog); void rc_destroy_regalloc_state(struct rc_regalloc_state *s); diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c index dc3e782e8ad..0db8eb9c75c 100644 --- a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -3,278 +3,279 @@ * SPDX-License-Identifier: MIT */ -#include -#include #include "radeon_remove_constants.h" -#include "radeon_dataflow.h" +#include +#include #include "util/bitscan.h" +#include "radeon_dataflow.h" struct const_remap_state { - /* Used when emiting shaders constants. */ - struct const_remap *remap_table; - /* Used when rewritign registers */ - struct const_remap *inv_remap_table; - /* Old costant layout. */ - struct rc_constant *constants; - /* New constant layout. */ - struct rc_constant_list new_constants; - /* Marks immediates that are used as a vector. Those will be just copied. */ - uint8_t *is_used_as_vector; - bool has_rel_addr; - bool are_externals_remapped; - bool is_identity; + /* Used when emiting shaders constants. */ + struct const_remap *remap_table; + /* Used when rewritign registers */ + struct const_remap *inv_remap_table; + /* Old costant layout. */ + struct rc_constant *constants; + /* New constant layout. */ + struct rc_constant_list new_constants; + /* Marks immediates that are used as a vector. Those will be just copied. */ + uint8_t *is_used_as_vector; + bool has_rel_addr; + bool are_externals_remapped; + bool is_identity; }; -static void remap_regs(struct rc_instruction *inst, - struct const_remap *inv_remap_table) +static void +remap_regs(struct rc_instruction *inst, struct const_remap *inv_remap_table) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) { - if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT) - continue; - unsigned old_index = inst->U.I.SrcReg[src].Index; - for (unsigned chan = 0; chan < 4; chan++) { - unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); - if (old_swz <= RC_SWIZZLE_W) { - inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz]; - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, - inv_remap_table[old_index].swizzle[old_swz]); - } - } - } + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT) + continue; + unsigned old_index = inst->U.I.SrcReg[src].Index; + for (unsigned chan = 0; chan < 4; chan++) { + unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + if (old_swz <= RC_SWIZZLE_W) { + inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz]; + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + inv_remap_table[old_index].swizzle[old_swz]); + } + } + } } -static void mark_used(void * userdata, struct rc_instruction * inst, - struct rc_src_register * src) +static void +mark_used(void *userdata, struct rc_instruction *inst, struct rc_src_register *src) { - struct const_remap_state* d = userdata; + struct const_remap_state *d = userdata; - if (src->File == RC_FILE_CONSTANT) { - uint8_t mask = 0; - if (src->RelAddr) { - d->has_rel_addr = true; - } else { - for (unsigned chan = 0; chan < 4; chan++) { - char swz = GET_SWZ(src->Swizzle, chan); - if (swz > RC_SWIZZLE_W) - continue; - mask |= 1 << swz; - } - } - d->constants[src->Index].UseMask |= mask; - if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE && - util_bitcount(mask) > 1) { - d->is_used_as_vector[src->Index] |= mask; - } - } + if (src->File == RC_FILE_CONSTANT) { + uint8_t mask = 0; + if (src->RelAddr) { + d->has_rel_addr = true; + } else { + for (unsigned chan = 0; chan < 4; chan++) { + char swz = GET_SWZ(src->Swizzle, chan); + if (swz > RC_SWIZZLE_W) + continue; + mask |= 1 << swz; + } + } + d->constants[src->Index].UseMask |= mask; + if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE && util_bitcount(mask) > 1) { + d->is_used_as_vector[src->Index] |= mask; + } + } } -static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i) +static void +place_constant_in_free_slot(struct const_remap_state *s, unsigned i) { - unsigned count = s->new_constants.Count; - for (unsigned chan = 0; chan < 4; chan++) { - s->inv_remap_table[i].index[chan] = count; - s->inv_remap_table[i].swizzle[chan] = chan; - if (s->constants[i].UseMask & (1 << chan)) { - s->remap_table[count].index[chan] = i; - s->remap_table[count].swizzle[chan] = chan; - } - } - s->new_constants.Constants[count] = s->constants[i]; + unsigned count = s->new_constants.Count; + for (unsigned chan = 0; chan < 4; chan++) { + s->inv_remap_table[i].index[chan] = count; + s->inv_remap_table[i].swizzle[chan] = chan; + if (s->constants[i].UseMask & (1 << chan)) { + s->remap_table[count].index[chan] = i; + s->remap_table[count].swizzle[chan] = chan; + } + } + s->new_constants.Constants[count] = s->constants[i]; - if (count != i) { - if (s->constants[i].Type == RC_CONSTANT_EXTERNAL) - s->are_externals_remapped = true; - s->is_identity = false; - } - s->new_constants.Count++; + if (count != i) { + if (s->constants[i].Type == RC_CONSTANT_EXTERNAL) + s->are_externals_remapped = true; + s->is_identity = false; + } + s->new_constants.Count++; } -static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i) +static void +place_immediate_in_free_slot(struct const_remap_state *s, unsigned i) { - assert(util_bitcount(s->is_used_as_vector[i]) > 1); + assert(util_bitcount(s->is_used_as_vector[i]) > 1); - unsigned count = s->new_constants.Count; + unsigned count = s->new_constants.Count; - s->new_constants.Constants[count] = s->constants[i]; - s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i]; - for (unsigned chan = 0; chan < 4; chan++) { - if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) { - s->inv_remap_table[i].index[chan] = count; - s->inv_remap_table[i].swizzle[chan] = chan; - } - } - if (count != i) { - s->is_identity = false; - } - s->new_constants.Count++; + s->new_constants.Constants[count] = s->constants[i]; + s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i]; + for (unsigned chan = 0; chan < 4; chan++) { + if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) { + s->inv_remap_table[i].index[chan] = count; + s->inv_remap_table[i].swizzle[chan] = chan; + } + } + if (count != i) { + s->is_identity = false; + } + s->new_constants.Count++; } -static void try_merge_constants_external(struct const_remap_state *s, unsigned i) +static void +try_merge_constants_external(struct const_remap_state *s, unsigned i) { - assert(util_bitcount(s->constants[i].UseMask) == 1); - for (unsigned j = 0; j < s->new_constants.Count; j++) { - for (unsigned chan = 0; chan < 4; chan++) { - if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) { - /* Writemask to swizzle */ - unsigned swizzle = 0; - for (; swizzle < 4; swizzle++) - if (s->constants[i].UseMask >> swizzle == 1) - break; - /* Update the remap tables. */ - s->remap_table[j].index[chan] = i; - s->remap_table[j].swizzle[chan] = swizzle; - s->inv_remap_table[i].index[swizzle] = j; - s->inv_remap_table[i].swizzle[swizzle] = chan; - s->are_externals_remapped = true; - s->is_identity = false; - return; - } - } - } - place_constant_in_free_slot(s, i); + assert(util_bitcount(s->constants[i].UseMask) == 1); + for (unsigned j = 0; j < s->new_constants.Count; j++) { + for (unsigned chan = 0; chan < 4; chan++) { + if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) { + /* Writemask to swizzle */ + unsigned swizzle = 0; + for (; swizzle < 4; swizzle++) + if (s->constants[i].UseMask >> swizzle == 1) + break; + /* Update the remap tables. */ + s->remap_table[j].index[chan] = i; + s->remap_table[j].swizzle[chan] = swizzle; + s->inv_remap_table[i].index[swizzle] = j; + s->inv_remap_table[i].swizzle[swizzle] = chan; + s->are_externals_remapped = true; + s->is_identity = false; + return; + } + } + } + place_constant_in_free_slot(s, i); } -static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s) +static void +init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s) { - s->is_identity = true; - s->is_used_as_vector = malloc(c->Program.Constants.Count); - s->new_constants.Constants = - malloc(sizeof(struct rc_constant) * c->Program.Constants.Count); - s->new_constants._Reserved = c->Program.Constants.Count; - s->constants = c->Program.Constants.Constants; - memset(s->is_used_as_vector, 0, c->Program.Constants.Count); + s->is_identity = true; + s->is_used_as_vector = malloc(c->Program.Constants.Count); + s->new_constants.Constants = malloc(sizeof(struct rc_constant) * c->Program.Constants.Count); + s->new_constants._Reserved = c->Program.Constants.Count; + s->constants = c->Program.Constants.Constants; + memset(s->is_used_as_vector, 0, c->Program.Constants.Count); - s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap)); - s->inv_remap_table = - malloc(c->Program.Constants.Count * sizeof(struct const_remap)); - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - /* Clear the UseMask, we will update it later. */ - s->constants[i].UseMask = 0; - for (unsigned swz = 0; swz < 4; swz++) { - s->remap_table[i].index[swz] = -1; - s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED; - } - } + s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap)); + s->inv_remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap)); + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + /* Clear the UseMask, we will update it later. */ + s->constants[i].UseMask = 0; + for (unsigned swz = 0; swz < 4; swz++) { + s->remap_table[i].index[swz] = -1; + s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED; + } + } } -void rc_remove_unused_constants(struct radeon_compiler *c, void *user) +void +rc_remove_unused_constants(struct radeon_compiler *c, void *user) { - struct const_remap **out_remap_table = (struct const_remap **)user; - struct rc_constant *constants = c->Program.Constants.Constants; - struct const_remap_state remap_state = {}; - struct const_remap_state *s = &remap_state; + struct const_remap **out_remap_table = (struct const_remap **)user; + struct rc_constant *constants = c->Program.Constants.Constants; + struct const_remap_state remap_state = {}; + struct const_remap_state *s = &remap_state; - if (!c->Program.Constants.Count) { - *out_remap_table = NULL; - return; - } + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } - init_constant_remap_state(c, s); + init_constant_remap_state(c, s); - /* Pass 1: Mark used constants. */ - for (struct rc_instruction *inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - rc_for_all_reads_src(inst, mark_used, s); - } + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_src(inst, mark_used, s); + } - /* Pass 2: If there is relative addressing or dead constant elimination - * is disabled, mark all externals as used. */ - if (s->has_rel_addr || !c->remove_unused_constants) { - for (unsigned i = 0; i < c->Program.Constants.Count; i++) - if (constants[i].Type == RC_CONSTANT_EXTERNAL) - s->constants[i].UseMask = RC_MASK_XYZW; - } + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (s->has_rel_addr || !c->remove_unused_constants) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + s->constants[i].UseMask = RC_MASK_XYZW; + } + /* Pass 3: Make the remapping table and remap constants. + * First iterate over used vec2, vec3 and vec4 externals and place them in a free + * slots. While we could in theory merge 2 vec2 together, its not worth it + * as we would have to a) check that the swizzle is valid, b) transforming + * xy to zw would mean we need rgb and alpha source slot, thus it would hurt + * us potentially during pair scheduling. */ + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (constants[i].Type != RC_CONSTANT_EXTERNAL) + continue; + if (util_bitcount(s->constants[i].UseMask) > 1) { + place_constant_in_free_slot(s, i); + } + } - /* Pass 3: Make the remapping table and remap constants. - * First iterate over used vec2, vec3 and vec4 externals and place them in a free - * slots. While we could in theory merge 2 vec2 together, its not worth it - * as we would have to a) check that the swizzle is valid, b) transforming - * xy to zw would mean we need rgb and alpha source slot, thus it would hurt - * us potentially during pair scheduling. */ - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (constants[i].Type != RC_CONSTANT_EXTERNAL) - continue; - if (util_bitcount(s->constants[i].UseMask) > 1) { - place_constant_in_free_slot(s, i); - } - } + /* Now iterate over scalarar externals and put them into empty slots. */ + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (constants[i].Type != RC_CONSTANT_EXTERNAL) + continue; + if (util_bitcount(s->constants[i].UseMask) == 1) + try_merge_constants_external(s, i); + } - /* Now iterate over scalarar externals and put them into empty slots. */ - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (constants[i].Type != RC_CONSTANT_EXTERNAL) - continue; - if (util_bitcount(s->constants[i].UseMask) == 1) - try_merge_constants_external(s, i); - } + /* Now put immediates which are used as vectors. */ + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (constants[i].Type == RC_CONSTANT_IMMEDIATE && + util_bitcount(s->constants[i].UseMask) > 0 && + util_bitcount(s->is_used_as_vector[i]) > 0) { + place_immediate_in_free_slot(s, i); + } + } - /* Now put immediates which are used as vectors. */ - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (constants[i].Type == RC_CONSTANT_IMMEDIATE && - util_bitcount(s->constants[i].UseMask) > 0 && - util_bitcount(s->is_used_as_vector[i]) > 0) { - place_immediate_in_free_slot(s, i); - } - } + /* Now walk over scalar immediates and try to: + * a) check for duplicates, + * b) find free slot. + * All of this is already done by rc_constants_add_immediate_scalar, + * so just use it. + */ + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (constants[i].Type != RC_CONSTANT_IMMEDIATE) + continue; + for (unsigned chan = 0; chan < 4; chan++) { + if ((s->constants[i].UseMask) & (1 << chan) && + (~(s->is_used_as_vector[i]) & (1 << chan))) { + unsigned swz; + s->inv_remap_table[i].index[chan] = rc_constants_add_immediate_scalar( + &s->new_constants, constants[i].u.Immediate[chan], &swz); + s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0); + s->is_identity = false; + } + } + } - /* Now walk over scalar immediates and try to: - * a) check for duplicates, - * b) find free slot. - * All of this is already done by rc_constants_add_immediate_scalar, - * so just use it. - */ - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (constants[i].Type != RC_CONSTANT_IMMEDIATE) - continue; - for (unsigned chan = 0; chan < 4; chan++) { - if ((s->constants[i].UseMask) & (1 << chan) && - (~(s->is_used_as_vector[i]) & (1 << chan))) { - unsigned swz; - s->inv_remap_table[i].index[chan] = - rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz); - s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0); - s->is_identity = false; - } - } - } + /* Finally place state constants. */ + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (constants[i].Type != RC_CONSTANT_STATE) + continue; + if (util_bitcount(s->constants[i].UseMask) > 0) { + place_constant_in_free_slot(s, i); + } + } - /* Finally place state constants. */ - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (constants[i].Type != RC_CONSTANT_STATE) - continue; - if (util_bitcount(s->constants[i].UseMask) > 0) { - place_constant_in_free_slot(s, i); - } - } + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped)); - /* is_identity ==> new_count == old_count - * !is_identity ==> new_count < old_count */ - assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped)); + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!s->is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + remap_regs(inst, s->inv_remap_table); + } + } - /* Pass 4: Redirect reads of all constants to their new locations. */ - if (!s->is_identity) { - for (struct rc_instruction *inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - remap_regs(inst, s->inv_remap_table); - } - } + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + rc_constants_destroy(&c->Program.Constants); + c->Program.Constants = s->new_constants; - /* Set the new constant count. Note that new_count may be less than - * Count even though the remapping function is identity. In that case, - * the constants have been removed at the end of the array. */ - rc_constants_destroy(&c->Program.Constants); - c->Program.Constants = s->new_constants; + if (s->are_externals_remapped) { + *out_remap_table = s->remap_table; + } else { + *out_remap_table = NULL; + free(s->remap_table); + } - if (s->are_externals_remapped) { - *out_remap_table = s->remap_table; - } else { - *out_remap_table = NULL; - free(s->remap_table); - } + free(s->inv_remap_table); - free(s->inv_remap_table); - - if (c->Debug & RC_DBG_LOG) - rc_constants_print(&c->Program.Constants, s->remap_table); + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants, s->remap_table); } diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c index 3633b1c0a08..de40d62641a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c @@ -20,38 +20,37 @@ * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ -void rc_rename_regs(struct radeon_compiler *c, void *user) +void +rc_rename_regs(struct radeon_compiler *c, void *user) { - struct rc_instruction * inst; - struct rc_list * variables; - struct rc_list * var_ptr; + struct rc_instruction *inst; + struct rc_list *variables; + struct rc_list *var_ptr; - /* XXX Remove this once the register allocation works with flow control. */ - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) - return; - } + /* XXX Remove this once the register allocation works with flow control. */ + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } - variables = rc_get_variables(c); + variables = rc_get_variables(c); - for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { - int new_index; - unsigned writemask; - struct rc_variable * var = var_ptr->Item; + for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { + int new_index; + unsigned writemask; + struct rc_variable *var = var_ptr->Item; - if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { - continue; - } + if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + continue; + } - new_index = rc_find_free_temporary(c); - if (new_index < 0) { - rc_error(c, "Ran out of temporary registers\n"); - return; - } + new_index = rc_find_free_temporary(c); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } - writemask = rc_variable_writemask_sum(var); - rc_variable_change_dst(var, new_index, writemask); - } + writemask = rc_variable_writemask_sum(var); + rc_variable_change_dst(var, new_index, writemask); + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h index 905ea8f7352..d3cfd5b439f 100644 --- a/src/gallium/drivers/r300/compiler/radeon_swizzle.h +++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h @@ -9,27 +9,27 @@ #include "radeon_program.h" struct rc_swizzle_split { - unsigned char NumPhases; - unsigned char Phase[4]; + unsigned char NumPhases; + unsigned char Phase[4]; }; /** * Describe the swizzling capability of target hardware. */ struct rc_swizzle_caps { - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - * - * \return 1 if the swizzle is native for the given opcode - */ - int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); - /** - * Determine how to split access to the masked channels of the - * given source register to obtain ALU-native swizzles. - */ - void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split *split); }; extern const struct rc_swizzle_caps r300_vertprog_swizzle_caps; diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c index d1565922ccd..5cd9485094e 100644 --- a/src/gallium/drivers/r300/compiler/radeon_variable.c +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -3,9 +3,9 @@ * SPDX-License-Identifier: MIT */ +#include "radeon_variable.h" #include #include -#include "radeon_variable.h" #include "memory_pool.h" #include "radeon_compiler_util.h" @@ -19,314 +19,283 @@ * and its friends to new_index and new_writemask. This function also takes * care of rewriting the swizzles for the sources of var. */ -void rc_variable_change_dst( - struct rc_variable * var, - unsigned int new_index, - unsigned int new_writemask) +void +rc_variable_change_dst(struct rc_variable *var, unsigned int new_index, unsigned int new_writemask) { - struct rc_variable * var_ptr; - struct rc_list * readers; - unsigned int old_mask = rc_variable_writemask_sum(var); - unsigned int conversion_swizzle = - rc_make_conversion_swizzle(old_mask, new_writemask); + struct rc_variable *var_ptr; + struct rc_list *readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = rc_make_conversion_swizzle(old_mask, new_writemask); - for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { - if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { - rc_normal_rewrite_writemask(var_ptr->Inst, - conversion_swizzle); - var_ptr->Inst->U.I.DstReg.Index = new_index; - } else { - struct rc_pair_sub_instruction * sub; - if (var_ptr->Dst.WriteMask == RC_MASK_W) { - assert(new_writemask & RC_MASK_W); - sub = &var_ptr->Inst->U.P.Alpha; - } else { - sub = &var_ptr->Inst->U.P.RGB; - rc_pair_rewrite_writemask(sub, - conversion_swizzle); - } - sub->DestIndex = new_index; - } - } + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction *sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, conversion_swizzle); + } + sub->DestIndex = new_index; + } + } - readers = rc_variable_readers_union(var); + readers = rc_variable_readers_union(var); - for ( ; readers; readers = readers->Next) { - struct rc_reader * reader = readers->Item; - if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { - reader->U.I.Src->Index = new_index; - reader->U.I.Src->Swizzle = rc_rewrite_swizzle( - reader->U.I.Src->Swizzle, conversion_swizzle); - } else { - struct rc_pair_instruction * pair_inst = - &reader->Inst->U.P; - unsigned int src_type = rc_source_type_swz( - reader->U.P.Arg->Swizzle); + for (; readers; readers = readers->Next) { + struct rc_reader *reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = + rc_rewrite_swizzle(reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction *pair_inst = &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz(reader->U.P.Arg->Swizzle); - int src_index = reader->U.P.Arg->Source; - if (src_index == RC_PAIR_PRESUB_SRC) { - src_index = rc_pair_get_src_index( - pair_inst, reader->U.P.Src); - } - rc_pair_remove_src(reader->Inst, src_type, - src_index); - /* Reuse the source index of the source that - * was just deleted and set its register - * index. We can't use rc_pair_alloc_source - * for this because it might return a source - * index that is already being used. */ - if (src_type & RC_SOURCE_RGB) { - pair_inst->RGB.Src[src_index] - .Used = 1; - pair_inst->RGB.Src[src_index] - .Index = new_index; - pair_inst->RGB.Src[src_index] - .File = RC_FILE_TEMPORARY; - } - if (src_type & RC_SOURCE_ALPHA) { - pair_inst->Alpha.Src[src_index] - .Used = 1; - pair_inst->Alpha.Src[src_index] - .Index = new_index; - pair_inst->Alpha.Src[src_index] - .File = RC_FILE_TEMPORARY; - } - reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( - reader->U.P.Arg->Swizzle, conversion_swizzle); - if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { - reader->U.P.Arg->Source = src_index; - } - } - } + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index(pair_inst, reader->U.P.Src); + } + rc_pair_remove_src(reader->Inst, src_type, src_index); + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this because it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index].Used = 1; + pair_inst->RGB.Src[src_index].Index = new_index; + pair_inst->RGB.Src[src_index].File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index].Used = 1; + pair_inst->Alpha.Src[src_index].Index = new_index; + pair_inst->Alpha.Src[src_index].File = RC_FILE_TEMPORARY; + } + reader->U.P.Arg->Swizzle = + rc_rewrite_swizzle(reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } } /** * Compute the live intervals for var and its friends. */ -void rc_variable_compute_live_intervals(struct rc_variable * var) +void +rc_variable_compute_live_intervals(struct rc_variable *var) { - while(var) { - unsigned int i; - unsigned int start = var->Inst->IP; + while (var) { + unsigned int i; + unsigned int start = var->Inst->IP; - for (i = 0; i < var->ReaderCount; i++) { - unsigned int chan; - unsigned int chan_start = start; - unsigned int chan_end = var->Readers[i].Inst->IP; - unsigned int mask = var->Readers[i].WriteMask; - struct rc_instruction * inst; + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction *inst; - /* Extend the live interval of T0 to the start of the - * loop for sequences like: - * BGNLOOP - * read T0 - * ... - * write T0 - * ENDLOOP - */ - if (var->Readers[i].Inst->IP < start) { - struct rc_instruction * bgnloop = - rc_match_endloop(var->Readers[i].Inst); - chan_start = bgnloop->IP; - } + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction *bgnloop = rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } - /* Extend the live interval of T0 to the start of the - * loop in case there is a BRK instruction in the loop - * (we don't actually check for a BRK instruction we - * assume there is one somewhere in the loop, which - * there usually is) for sequences like: - * BGNLOOP - * ... - * conditional BRK - * ... - * write T0 - * ENDLOOP - * read T0 - *************************************************** - * Extend the live interval of T0 to the end of the - * loop for sequences like: - * write T0 - * BGNLOOP - * ... - * read T0 - * ENDLOOP - */ - for (inst = var->Inst; inst != var->Readers[i].Inst; - inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_ENDLOOP) { - struct rc_instruction * bgnloop = - rc_match_endloop(inst); - if (bgnloop->IP < chan_start) { - chan_start = bgnloop->IP; - } - } else if (op == RC_OPCODE_BGNLOOP) { - struct rc_instruction * endloop = - rc_match_bgnloop(inst); - if (endloop->IP > chan_end) { - chan_end = endloop->IP; - } - } - } + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction *bgnloop = rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction *endloop = rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } - for (chan = 0; chan < 4; chan++) { - if ((mask >> chan) & 0x1) { - if (!var->Live[chan].Used - || chan_start < var->Live[chan].Start) { - var->Live[chan].Start = - chan_start; - } - if (!var->Live[chan].Used - || chan_end > var->Live[chan].End) { - var->Live[chan].End = chan_end; - } - var->Live[chan].Used = 1; - } - } - } - var = var->Friend; - } + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = chan_start; + } + if (!var->Live[chan].Used || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } } /** * @return 1 if a and b share a reader * @return 0 if they do not */ -static unsigned int readers_intersect( - struct rc_variable * a, - struct rc_variable * b) +static unsigned int +readers_intersect(struct rc_variable *a, struct rc_variable *b) { - unsigned int a_index, b_index; - for (a_index = 0; a_index < a->ReaderCount; a_index++) { - struct rc_reader reader_a = a->Readers[a_index]; - for (b_index = 0; b_index < b->ReaderCount; b_index++) { - struct rc_reader reader_b = b->Readers[b_index]; - if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL - && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL - && reader_a.U.I.Src == reader_b.U.I.Src) { + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL && + reader_b.Inst->Type == RC_INSTRUCTION_NORMAL && reader_a.U.I.Src == reader_b.U.I.Src) { - return 1; - } - if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR - && reader_b.Inst->Type == RC_INSTRUCTION_PAIR - && reader_a.U.P.Src == reader_b.U.P.Src) { + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR && + reader_b.Inst->Type == RC_INSTRUCTION_PAIR && reader_a.U.P.Src == reader_b.U.P.Src) { - return 1; - } - } - } - return 0; + return 1; + } + } + } + return 0; } -void rc_variable_add_friend( - struct rc_variable * var, - struct rc_variable * friend) +void +rc_variable_add_friend(struct rc_variable *var, struct rc_variable *friend) { - assert(var->Dst.Index == friend->Dst.Index); - while(var->Friend) { - var = var->Friend; - } - var->Friend = friend; + assert(var->Dst.Index == friend->Dst.Index); + while (var->Friend) { + var = var->Friend; + } + var->Friend = friend; } -struct rc_variable * rc_variable( - struct radeon_compiler * c, - unsigned int DstFile, - unsigned int DstIndex, - unsigned int DstWriteMask, - struct rc_reader_data * reader_data) +struct rc_variable * +rc_variable(struct radeon_compiler *c, unsigned int DstFile, unsigned int DstIndex, + unsigned int DstWriteMask, struct rc_reader_data *reader_data) { - struct rc_variable * new = - memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); - memset(new, 0, sizeof(struct rc_variable)); - new->C = c; - new->Dst.File = DstFile; - new->Dst.Index = DstIndex; - new->Dst.WriteMask = DstWriteMask; - if (reader_data) { - new->Inst = reader_data->Writer; - new->ReaderCount = reader_data->ReaderCount; - new->Readers = reader_data->Readers; - } - return new; + struct rc_variable *new = memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; } -static void get_variable_helper( - struct rc_list ** variable_list, - struct rc_variable * variable) +static void +get_variable_helper(struct rc_list **variable_list, struct rc_variable *variable) { - struct rc_list * list_ptr; - for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { - struct rc_variable * var; - for (var = list_ptr->Item; var; var = var->Friend) { - if (readers_intersect(var, variable)) { - rc_variable_add_friend(var, variable); - return; - } - } - } - rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); + struct rc_list *list_ptr; + for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable *var; + for (var = list_ptr->Item; var; var = var->Friend) { + if (readers_intersect(var, variable)) { + rc_variable_add_friend(var, variable); + return; + } + } + } + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); } -static void get_variable_pair_helper( - struct rc_list ** variable_list, - struct radeon_compiler * c, - struct rc_instruction * inst, - struct rc_pair_sub_instruction * sub_inst) +static void +get_variable_pair_helper(struct rc_list **variable_list, struct radeon_compiler *c, + struct rc_instruction *inst, struct rc_pair_sub_instruction *sub_inst) { - struct rc_reader_data reader_data; - struct rc_variable * new_var; - rc_register_file file; - unsigned int writemask; + struct rc_reader_data reader_data; + struct rc_variable *new_var; + rc_register_file file; + unsigned int writemask; - if (sub_inst->Opcode == RC_OPCODE_NOP) { - return; - } - memset(&reader_data, 0, sizeof(struct rc_reader_data)); - rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); - if (reader_data.ReaderCount == 0) { - return; - } + if (reader_data.ReaderCount == 0) { + return; + } - if (sub_inst->WriteMask) { - file = RC_FILE_TEMPORARY; - writemask = sub_inst->WriteMask; - } else if (sub_inst->OutputWriteMask) { - file = RC_FILE_OUTPUT; - writemask = sub_inst->OutputWriteMask; - } else { - writemask = 0; - file = RC_FILE_NONE; - } - new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, - &reader_data); - get_variable_helper(variable_list, new_var); + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, &reader_data); + get_variable_helper(variable_list, new_var); } /** * Compare function for sorting variable pointers by the lowest instruction * IP from it and its friends. */ -static int cmpfunc_variable_by_ip (const void * a, const void * b) { - struct rc_variable * var_a = *(struct rc_variable **)a; - struct rc_variable * var_b = *(struct rc_variable **)b; - unsigned int min_ip_a = var_a->Inst->IP; - unsigned int min_ip_b = var_b->Inst->IP; +static int +cmpfunc_variable_by_ip(const void *a, const void *b) +{ + struct rc_variable *var_a = *(struct rc_variable **)a; + struct rc_variable *var_b = *(struct rc_variable **)b; + unsigned int min_ip_a = var_a->Inst->IP; + unsigned int min_ip_b = var_b->Inst->IP; - /* Find the minimal IP of a variable and its friends */ - while (var_a->Friend) { - var_a = var_a->Friend; - if (var_a->Inst->IP < min_ip_a) - min_ip_a = var_a->Inst->IP; - } - while (var_b->Friend) { - var_b = var_b->Friend; - if (var_b->Inst->IP < min_ip_b) - min_ip_b = var_b->Inst->IP; - } + /* Find the minimal IP of a variable and its friends */ + while (var_a->Friend) { + var_a = var_a->Friend; + if (var_a->Inst->IP < min_ip_a) + min_ip_a = var_a->Inst->IP; + } + while (var_b->Friend) { + var_b = var_b->Friend; + if (var_b->Inst->IP < min_ip_b) + min_ip_b = var_b->Inst->IP; + } - return (int)min_ip_a - (int)min_ip_b; + return (int)min_ip_a - (int)min_ip_b; } /** @@ -336,117 +305,110 @@ static int cmpfunc_variable_by_ip (const void * a, const void * b) { * definition-use chain. Any two variables that share a reader are considered * "friends" and they are linked together via the Friend attribute. */ -struct rc_list * rc_get_variables(struct radeon_compiler * c) +struct rc_list * +rc_get_variables(struct radeon_compiler *c) { - struct rc_instruction * inst; - struct rc_list * variable_list = NULL; + struct rc_instruction *inst; + struct rc_list *variable_list = NULL; - /* We search for the variables in two loops in order to get it right in - * the following specific case - * - * IF aluresult.x___; - * ... - * MAD temp[0].xyz, src0.000, src0.111, src0.000 - * MAD temp[0].w, src0.0, src0.1, src0.0 - * ELSE; - * ... - * TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; - * ENDIF; - * src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT - * MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 - * MAD temp[1].w, src0.w, src1.w, src0.0 - * - * If we go just in one loop, we will first create two variables for the - * temp[0].xyz and temp[0].w. This happens because they don't share a reader - * as the src1.xyz and src1.w of the instruction where the value is used are - * in theory independent. They are not because the same register is written - * also by the texture instruction in the other branch and TEX can't write xyz - * and w separately. - * - * Therefore first search for RC_INSTRUCTION_NORMAL to create variables from - * the texture instruction and than the pair instructions will be properly - * marked as friends. So we will end with only one variable here as we should. - * - * This doesn't matter before the pair translation, because everything is - * RC_INSTRUCTION_NORMAL. - */ - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - struct rc_reader_data reader_data; - struct rc_variable * new_var; - memset(&reader_data, 0, sizeof(reader_data)); - rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); - if (reader_data.ReaderCount == 0) { - /* Variable is only returned if there is both writer - * and reader. This means dead writes will not get - * register allocated as a result and can overwrite random - * registers. Assert on dead writes instead so we can improve - * the DCE. - */ - const struct rc_opcode_info *opcode = - rc_get_opcode_info(inst->U.I.Opcode); - assert(c->type == RC_FRAGMENT_PROGRAM || - !opcode->HasDstReg || - inst->U.I.DstReg.File == RC_FILE_OUTPUT || - inst->U.I.DstReg.File == RC_FILE_ADDRESS); - continue; - } - new_var = rc_variable(c, inst->U.I.DstReg.File, - inst->U.I.DstReg.Index, - inst->U.I.DstReg.WriteMask, &reader_data); - get_variable_helper(&variable_list, new_var); - } - } + /* We search for the variables in two loops in order to get it right in + * the following specific case + * + * IF aluresult.x___; + * ... + * MAD temp[0].xyz, src0.000, src0.111, src0.000 + * MAD temp[0].w, src0.0, src0.1, src0.0 + * ELSE; + * ... + * TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; + * ENDIF; + * src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT + * MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 + * MAD temp[1].w, src0.w, src1.w, src0.0 + * + * If we go just in one loop, we will first create two variables for the + * temp[0].xyz and temp[0].w. This happens because they don't share a reader + * as the src1.xyz and src1.w of the instruction where the value is used are + * in theory independent. They are not because the same register is written + * also by the texture instruction in the other branch and TEX can't write xyz + * and w separately. + * + * Therefore first search for RC_INSTRUCTION_NORMAL to create variables from + * the texture instruction and than the pair instructions will be properly + * marked as friends. So we will end with only one variable here as we should. + * + * This doesn't matter before the pair translation, because everything is + * RC_INSTRUCTION_NORMAL. + */ + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct rc_reader_data reader_data; + struct rc_variable *new_var; + memset(&reader_data, 0, sizeof(reader_data)); + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + /* Variable is only returned if there is both writer + * and reader. This means dead writes will not get + * register allocated as a result and can overwrite random + * registers. Assert on dead writes instead so we can improve + * the DCE. + */ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + assert(c->type == RC_FRAGMENT_PROGRAM || !opcode->HasDstReg || + inst->U.I.DstReg.File == RC_FILE_OUTPUT || + inst->U.I.DstReg.File == RC_FILE_ADDRESS); + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&variable_list, new_var); + } + } - bool needs_sorting = false; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - if (inst->Type != RC_INSTRUCTION_NORMAL) { - needs_sorting = true; - get_variable_pair_helper(&variable_list, c, inst, - &inst->U.P.RGB); - get_variable_pair_helper(&variable_list, c, inst, - &inst->U.P.Alpha); - } - } + bool needs_sorting = false; + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + if (inst->Type != RC_INSTRUCTION_NORMAL) { + needs_sorting = true; + get_variable_pair_helper(&variable_list, c, inst, &inst->U.P.RGB); + get_variable_pair_helper(&variable_list, c, inst, &inst->U.P.Alpha); + } + } - if (variable_list && needs_sorting) { - unsigned int count = rc_list_count(variable_list); - struct rc_variable **variables = memory_pool_malloc(&c->Pool, - sizeof(struct rc_variable *) * count); + if (variable_list && needs_sorting) { + unsigned int count = rc_list_count(variable_list); + struct rc_variable **variables = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable *) * count); - struct rc_list * current = variable_list; - for(unsigned int i = 0; current; i++, current = current->Next) { - struct rc_variable * var = current->Item; - variables[i] = var; - } + struct rc_list *current = variable_list; + for (unsigned int i = 0; current; i++, current = current->Next) { + struct rc_variable *var = current->Item; + variables[i] = var; + } - qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip); + qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip); - current = variable_list; - for(unsigned int i = 0; current; i++, current = current->Next) { - current->Item = variables[i]; - } - } + current = variable_list; + for (unsigned int i = 0; current; i++, current = current->Next) { + current->Item = variables[i]; + } + } - return variable_list; + return variable_list; } /** * @return The bitwise or of the writemasks of a variable and all of its * friends. */ -unsigned int rc_variable_writemask_sum(struct rc_variable * var) +unsigned int +rc_variable_writemask_sum(struct rc_variable *var) { - unsigned int writemask = 0; - while(var) { - writemask |= var->Dst.WriteMask; - var = var->Friend; - } - return writemask; + unsigned int writemask = 0; + while (var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; } /* @@ -454,135 +416,121 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var) * that read from two different variable friends are only included once in * this list. */ -struct rc_list * rc_variable_readers_union(struct rc_variable * var) +struct rc_list * +rc_variable_readers_union(struct rc_variable *var) { - struct rc_list * list = NULL; - while (var) { - unsigned int i; - for (i = 0; i < var->ReaderCount; i++) { - struct rc_list * temp; - struct rc_reader * a = &var->Readers[i]; - unsigned int match = 0; - for (temp = list; temp; temp = temp->Next) { - struct rc_reader * b = temp->Item; - if (a->Inst->Type != b->Inst->Type) { - continue; - } - if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { - if (a->U.I.Src == b->U.I.Src) { - match = 1; - break; - } - } - if (a->Inst->Type == RC_INSTRUCTION_PAIR) { - if (a->U.P.Arg == b->U.P.Arg - && a->U.P.Src == b->U.P.Src) { - match = 1; - break; - } - } - } - if (match) { - continue; - } - rc_list_add(&list, rc_list(&var->C->Pool, a)); - } - var = var->Friend; - } - return list; + struct rc_list *list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list *temp; + struct rc_reader *a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader *b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; } -static unsigned int reader_equals_src( - struct rc_reader reader, - unsigned int src_type, - void * src) +static unsigned int +reader_equals_src(struct rc_reader reader, unsigned int src_type, void *src) { - if (reader.Inst->Type != src_type) { - return 0; - } - if (src_type == RC_INSTRUCTION_NORMAL) { - return reader.U.I.Src == src; - } else { - return reader.U.P.Src == src; - } + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } } -static unsigned int variable_writes_src( - struct rc_variable * var, - unsigned int src_type, - void * src) +static unsigned int +variable_writes_src(struct rc_variable *var, unsigned int src_type, void *src) { - unsigned int i; - for (i = 0; i < var->ReaderCount; i++) { - if (reader_equals_src(var->Readers[i], src_type, src)) { - return 1; - } - } - return 0; + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; } - -struct rc_list * rc_variable_list_get_writers( - struct rc_list * var_list, - unsigned int src_type, - void * src) +struct rc_list * +rc_variable_list_get_writers(struct rc_list *var_list, unsigned int src_type, void *src) { - struct rc_list * list_ptr; - struct rc_list * writer_list = NULL; - for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { - struct rc_variable * var = list_ptr->Item; - if (variable_writes_src(var, src_type, src)) { - struct rc_variable * friend; - rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); - for (friend = var->Friend; friend; - friend = friend->Friend) { - if (variable_writes_src(friend, src_type, src)) { - rc_list_add(&writer_list, - rc_list(&var->C->Pool, friend)); - } - } - /* Once we have identified the variable and its - * friends that write this source, we can stop - * stop searching, because we know none of the - * other variables in the list will write this source. - * If they did they would be friends of var. - */ - break; - } - } - return writer_list; + struct rc_list *list_ptr; + struct rc_list *writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable *var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable *friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, rc_list(&var->C->Pool, friend)); + } + } + /* Once we have identified the variable and its + * friends that write this source, we can stop + * stop searching, because we know none of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; } -struct rc_list * rc_variable_list_get_writers_one_reader( - struct rc_list * var_list, - unsigned int src_type, - void * src) +struct rc_list * +rc_variable_list_get_writers_one_reader(struct rc_list *var_list, unsigned int src_type, void *src) { - struct rc_list * writer_list = - rc_variable_list_get_writers(var_list, src_type, src); - struct rc_list * reader_list = - rc_variable_readers_union(writer_list->Item); - if (rc_list_count(reader_list) > 1) { - return NULL; - } else { - return writer_list; - } + struct rc_list *writer_list = rc_variable_list_get_writers(var_list, src_type, src); + struct rc_list *reader_list = rc_variable_readers_union(writer_list->Item); + if (rc_list_count(reader_list) > 1) { + return NULL; + } else { + return writer_list; + } } -void rc_variable_print(struct rc_variable * var) +void +rc_variable_print(struct rc_variable *var) { - unsigned int i; - while (var) { - fprintf(stderr, "%u: TEMP[%u].%u: ", - var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); - for (i = 0; i < 4; i++) { - fprintf(stderr, "chan %u: start=%u end=%u ", i, - var->Live[i].Start, var->Live[i].End); - } - fprintf(stderr, "%u readers\n", var->ReaderCount); - if (var->Friend) { - fprintf(stderr, "Friend: \n\t"); - } - var = var->Friend; - } + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } } diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h index ebd4f256dca..7e3426e96c0 100644 --- a/src/gallium/drivers/r300/compiler/radeon_variable.h +++ b/src/gallium/drivers/r300/compiler/radeon_variable.h @@ -14,59 +14,48 @@ struct rc_reader_data; struct rc_readers; struct live_intervals { - int Start; - int End; - int Used; + int Start; + int End; + int Used; }; struct rc_variable { - struct radeon_compiler * C; - struct rc_dst_register Dst; + struct radeon_compiler *C; + struct rc_dst_register Dst; - struct rc_instruction * Inst; - unsigned int ReaderCount; - struct rc_reader * Readers; - struct live_intervals Live[4]; + struct rc_instruction *Inst; + unsigned int ReaderCount; + struct rc_reader *Readers; + struct live_intervals Live[4]; - /* A friend is a variable that shares a reader with another variable. - */ - struct rc_variable * Friend; + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable *Friend; }; -void rc_variable_change_dst( - struct rc_variable * var, - unsigned int new_index, - unsigned int new_writemask); +void rc_variable_change_dst(struct rc_variable *var, unsigned int new_index, + unsigned int new_writemask); -void rc_variable_compute_live_intervals(struct rc_variable * var); +void rc_variable_compute_live_intervals(struct rc_variable *var); -void rc_variable_add_friend( - struct rc_variable * var, - struct rc_variable * friend); +void rc_variable_add_friend(struct rc_variable *var, struct rc_variable *friend); -struct rc_variable * rc_variable( - struct radeon_compiler * c, - unsigned int DstFile, - unsigned int DstIndex, - unsigned int DstWriteMask, - struct rc_reader_data * reader_data); +struct rc_variable *rc_variable(struct radeon_compiler *c, unsigned int DstFile, + unsigned int DstIndex, unsigned int DstWriteMask, + struct rc_reader_data *reader_data); -struct rc_list * rc_get_variables(struct radeon_compiler * c); +struct rc_list *rc_get_variables(struct radeon_compiler *c); -unsigned int rc_variable_writemask_sum(struct rc_variable * var); +unsigned int rc_variable_writemask_sum(struct rc_variable *var); -struct rc_list * rc_variable_readers_union(struct rc_variable * var); +struct rc_list *rc_variable_readers_union(struct rc_variable *var); -struct rc_list * rc_variable_list_get_writers( - struct rc_list * var_list, - unsigned int src_type, - void * src); +struct rc_list *rc_variable_list_get_writers(struct rc_list *var_list, unsigned int src_type, + void *src); -struct rc_list * rc_variable_list_get_writers_one_reader( - struct rc_list * var_list, - unsigned int src_type, - void * src); +struct rc_list *rc_variable_list_get_writers_one_reader(struct rc_list *var_list, + unsigned int src_type, void *src); -void rc_variable_print(struct rc_variable * var); +void rc_variable_print(struct rc_variable *var); #endif /* RADEON_VARIABLE_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c index b71e210f0d2..c5bbadc63bd 100644 --- a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c +++ b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c @@ -11,259 +11,246 @@ #include "radeon_program_constants.h" struct vert_fc_state { - struct radeon_compiler *C; - unsigned BranchDepth; - unsigned LoopDepth; - unsigned LoopsReserved; - int PredStack[R500_PVS_MAX_LOOP_DEPTH]; - int PredicateReg; + struct radeon_compiler *C; + unsigned BranchDepth; + unsigned LoopDepth; + unsigned LoopsReserved; + int PredStack[R500_PVS_MAX_LOOP_DEPTH]; + int PredicateReg; }; -static void build_pred_src( - struct rc_src_register * src, - struct vert_fc_state * fc_state) +static void +build_pred_src(struct rc_src_register *src, struct vert_fc_state *fc_state) { - src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_W); - src->File = RC_FILE_TEMPORARY; - src->Index = fc_state->PredicateReg; + src->Swizzle = + RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_W); + src->File = RC_FILE_TEMPORARY; + src->Index = fc_state->PredicateReg; } -static void build_pred_dst( - struct rc_dst_register * dst, - struct vert_fc_state * fc_state) +static void +build_pred_dst(struct rc_dst_register *dst, struct vert_fc_state *fc_state) { - dst->WriteMask = RC_MASK_W; - dst->File = RC_FILE_TEMPORARY; - dst->Index = fc_state->PredicateReg; + dst->WriteMask = RC_MASK_W; + dst->File = RC_FILE_TEMPORARY; + dst->Index = fc_state->PredicateReg; } -static void mark_write(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void +mark_write(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index, + unsigned int mask) { - unsigned int * writemasks = userdata; + unsigned int *writemasks = userdata; - if (file != RC_FILE_TEMPORARY) - return; + if (file != RC_FILE_TEMPORARY) + return; - if (index >= R300_VS_MAX_TEMPS) - return; + if (index >= R300_VS_MAX_TEMPS) + return; - writemasks[index] |= mask; + writemasks[index] |= mask; } -static int reserve_predicate_reg(struct vert_fc_state * fc_state) +static int +reserve_predicate_reg(struct vert_fc_state *fc_state) { - int i; - unsigned int writemasks[RC_REGISTER_MAX_INDEX]; - struct rc_instruction * inst; - memset(writemasks, 0, sizeof(writemasks)); - for(inst = fc_state->C->Program.Instructions.Next; - inst != &fc_state->C->Program.Instructions; - inst = inst->Next) { - rc_for_all_writes_mask(inst, mark_write, writemasks); - } + int i; + unsigned int writemasks[RC_REGISTER_MAX_INDEX]; + struct rc_instruction *inst; + memset(writemasks, 0, sizeof(writemasks)); + for (inst = fc_state->C->Program.Instructions.Next; inst != &fc_state->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } - for(i = 0; i < fc_state->C->max_temp_regs; i++) { - /* Most of the control flow instructions only write the - * W component of the Predicate Register, but - * the docs say that ME_PRED_SET_CLR and - * ME_PRED_SET_RESTORE write all components of the - * register, so we must reserve a register that has - * all its components free. */ - if (!writemasks[i]) { - fc_state->PredicateReg = i; - break; - } - } - if (i == fc_state->C->max_temp_regs) { - rc_error(fc_state->C, "No free temporary to use for" - " predicate stack counter.\n"); - return -1; - } - return 1; + for (i = 0; i < fc_state->C->max_temp_regs; i++) { + /* Most of the control flow instructions only write the + * W component of the Predicate Register, but + * the docs say that ME_PRED_SET_CLR and + * ME_PRED_SET_RESTORE write all components of the + * register, so we must reserve a register that has + * all its components free. */ + if (!writemasks[i]) { + fc_state->PredicateReg = i; + break; + } + } + if (i == fc_state->C->max_temp_regs) { + rc_error(fc_state->C, "No free temporary to use for" + " predicate stack counter.\n"); + return -1; + } + return 1; } -static void lower_bgnloop( - struct rc_instruction * inst, - struct vert_fc_state * fc_state) +static void +lower_bgnloop(struct rc_instruction *inst, struct vert_fc_state *fc_state) { - struct rc_instruction * new_inst = - rc_insert_new_instruction(fc_state->C, inst->Prev); + struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst->Prev); - if ((!fc_state->C->is_r500 - && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) - || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) { - rc_error(fc_state->C, "Loops are nested too deep."); - return; - } + if ((!fc_state->C->is_r500 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) || + fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) { + rc_error(fc_state->C, "Loops are nested too deep."); + return; + } - if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) { - if (fc_state->PredicateReg == -1) { - if (reserve_predicate_reg(fc_state) == -1) { - return; - } - } + if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) { + if (fc_state->PredicateReg == -1) { + if (reserve_predicate_reg(fc_state) == -1) { + return; + } + } - /* Initialize the predicate bit to true. */ - new_inst->U.I.Opcode = RC_ME_PRED_SEQ; - build_pred_dst(&new_inst->U.I.DstReg, fc_state); - new_inst->U.I.SrcReg[0].Index = 0; - new_inst->U.I.SrcReg[0].File = RC_FILE_NONE; - new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; - } else { - fc_state->PredStack[fc_state->LoopDepth] = - fc_state->PredicateReg; - /* Copy the current predicate value to this loop's - * predicate register */ + /* Initialize the predicate bit to true. */ + new_inst->U.I.Opcode = RC_ME_PRED_SEQ; + build_pred_dst(&new_inst->U.I.DstReg, fc_state); + new_inst->U.I.SrcReg[0].Index = 0; + new_inst->U.I.SrcReg[0].File = RC_FILE_NONE; + new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + } else { + fc_state->PredStack[fc_state->LoopDepth] = fc_state->PredicateReg; + /* Copy the current predicate value to this loop's + * predicate register */ - /* Use the old predicate value for src0 */ - build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); + /* Use the old predicate value for src0 */ + build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); - /* Reserve this loop's predicate register */ - if (reserve_predicate_reg(fc_state) == -1) { - return; - } - - /* Copy the old predicate value to the new register */ - new_inst->U.I.Opcode = RC_OPCODE_ADD; - build_pred_dst(&new_inst->U.I.DstReg, fc_state); - new_inst->U.I.SrcReg[1].Index = 0; - new_inst->U.I.SrcReg[1].File = RC_FILE_NONE; - new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; - } + /* Reserve this loop's predicate register */ + if (reserve_predicate_reg(fc_state) == -1) { + return; + } + /* Copy the old predicate value to the new register */ + new_inst->U.I.Opcode = RC_OPCODE_ADD; + build_pred_dst(&new_inst->U.I.DstReg, fc_state); + new_inst->U.I.SrcReg[1].Index = 0; + new_inst->U.I.SrcReg[1].File = RC_FILE_NONE; + new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; + } } -static void lower_brk( - struct rc_instruction * inst, - struct vert_fc_state * fc_state) +static void +lower_brk(struct rc_instruction *inst, struct vert_fc_state *fc_state) { - if (fc_state->LoopDepth == 1) { - inst->U.I.Opcode = RC_OPCODE_RCP; - inst->U.I.DstReg.Pred = RC_PRED_SET; - inst->U.I.SrcReg[0].Index = 0; - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; - } else { - inst->U.I.Opcode = RC_ME_PRED_SET_CLR; - inst->U.I.DstReg.Pred = RC_PRED_SET; - } + if (fc_state->LoopDepth == 1) { + inst->U.I.Opcode = RC_OPCODE_RCP; + inst->U.I.DstReg.Pred = RC_PRED_SET; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + } else { + inst->U.I.Opcode = RC_ME_PRED_SET_CLR; + inst->U.I.DstReg.Pred = RC_PRED_SET; + } - build_pred_dst(&inst->U.I.DstReg, fc_state); + build_pred_dst(&inst->U.I.DstReg, fc_state); } -static void lower_endloop( - struct rc_instruction * inst, - struct vert_fc_state * fc_state) +static void +lower_endloop(struct rc_instruction *inst, struct vert_fc_state *fc_state) { - struct rc_instruction * new_inst = - rc_insert_new_instruction(fc_state->C, inst); + struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst); - new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE; - build_pred_dst(&new_inst->U.I.DstReg, fc_state); - /* Restore the previous predicate register. */ - fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1]; - build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); + new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE; + build_pred_dst(&new_inst->U.I.DstReg, fc_state); + /* Restore the previous predicate register. */ + fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1]; + build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); } -static void lower_if( - struct rc_instruction * inst, - struct vert_fc_state * fc_state) +static void +lower_if(struct rc_instruction *inst, struct vert_fc_state *fc_state) { - /* Reserve a temporary to use as our predicate stack counter, if we - * don't already have one. */ - if (fc_state->PredicateReg == -1) { - /* If we are inside a loop, the Predicate Register should - * have already been defined. */ - assert(fc_state->LoopDepth == 0); + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (fc_state->PredicateReg == -1) { + /* If we are inside a loop, the Predicate Register should + * have already been defined. */ + assert(fc_state->LoopDepth == 0); - if (reserve_predicate_reg(fc_state) == -1) { - return; - } - } + if (reserve_predicate_reg(fc_state) == -1) { + return; + } + } - if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) { - inst->U.I.Opcode = RC_ME_PRED_SNEQ; - } else { - unsigned swz; - inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; - memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], - sizeof(inst->U.I.SrcReg[1])); - swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle); - /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the - * w component */ - inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz); - build_pred_src(&inst->U.I.SrcReg[0], fc_state); - } - build_pred_dst(&inst->U.I.DstReg, fc_state); + if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) { + inst->U.I.Opcode = RC_ME_PRED_SNEQ; + } else { + unsigned swz; + inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; + memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], sizeof(inst->U.I.SrcReg[1])); + swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle); + /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the + * w component */ + inst->U.I.SrcReg[1].Swizzle = + RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz); + build_pred_src(&inst->U.I.SrcReg[0], fc_state); + } + build_pred_dst(&inst->U.I.DstReg, fc_state); } -void rc_vert_fc(struct radeon_compiler *c, void *user) +void +rc_vert_fc(struct radeon_compiler *c, void *user) { - struct rc_instruction * inst; - struct vert_fc_state fc_state; + struct rc_instruction *inst; + struct vert_fc_state fc_state; - memset(&fc_state, 0, sizeof(fc_state)); - fc_state.PredicateReg = -1; - fc_state.C = c; + memset(&fc_state, 0, sizeof(fc_state)); + fc_state.PredicateReg = -1; + fc_state.C = c; - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { + for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - switch (inst->U.I.Opcode) { + switch (inst->U.I.Opcode) { - case RC_OPCODE_BGNLOOP: - lower_bgnloop(inst, &fc_state); - fc_state.LoopDepth++; - break; + case RC_OPCODE_BGNLOOP: + lower_bgnloop(inst, &fc_state); + fc_state.LoopDepth++; + break; - case RC_OPCODE_BRK: - lower_brk(inst, &fc_state); - break; + case RC_OPCODE_BRK: + lower_brk(inst, &fc_state); + break; - case RC_OPCODE_ENDLOOP: - if (fc_state.BranchDepth != 0 - || fc_state.LoopDepth != 1) { - lower_endloop(inst, &fc_state); - /* Skip the new PRED_RESTORE */ - inst = inst->Next; - } - fc_state.LoopDepth--; - break; - case RC_OPCODE_IF: - lower_if(inst, &fc_state); - fc_state.BranchDepth++; - break; + case RC_OPCODE_ENDLOOP: + if (fc_state.BranchDepth != 0 || fc_state.LoopDepth != 1) { + lower_endloop(inst, &fc_state); + /* Skip the new PRED_RESTORE */ + inst = inst->Next; + } + fc_state.LoopDepth--; + break; + case RC_OPCODE_IF: + lower_if(inst, &fc_state); + fc_state.BranchDepth++; + break; - case RC_OPCODE_ELSE: - inst->U.I.Opcode = RC_ME_PRED_SET_INV; - build_pred_dst(&inst->U.I.DstReg, &fc_state); - build_pred_src(&inst->U.I.SrcReg[0], &fc_state); - break; + case RC_OPCODE_ELSE: + inst->U.I.Opcode = RC_ME_PRED_SET_INV; + build_pred_dst(&inst->U.I.DstReg, &fc_state); + build_pred_src(&inst->U.I.SrcReg[0], &fc_state); + break; - case RC_OPCODE_ENDIF: - /* TODO: If LoopDepth == 1 and there is only a single break - * we can optimize out the endif just after the break. However - * previous attempts were buggy, so keep it simple for now. - */ - inst->U.I.Opcode = RC_ME_PRED_SET_POP; - build_pred_dst(&inst->U.I.DstReg, &fc_state); - build_pred_src(&inst->U.I.SrcReg[0], &fc_state); - fc_state.BranchDepth--; - break; + case RC_OPCODE_ENDIF: + /* TODO: If LoopDepth == 1 and there is only a single break + * we can optimize out the endif just after the break. However + * previous attempts were buggy, so keep it simple for now. + */ + inst->U.I.Opcode = RC_ME_PRED_SET_POP; + build_pred_dst(&inst->U.I.DstReg, &fc_state); + build_pred_src(&inst->U.I.SrcReg[0], &fc_state); + fc_state.BranchDepth--; + break; - default: - if (fc_state.BranchDepth || fc_state.LoopDepth) { - inst->U.I.DstReg.Pred = RC_PRED_SET; - } - break; - } + default: + if (fc_state.BranchDepth || fc_state.LoopDepth) { + inst->U.I.DstReg.Pred = RC_PRED_SET; + } + break; + } - if (c->Error) { - return; - } - } + if (c->Error) { + return; + } + } }