radv: Format.

Using

find ./src/amd/vulkan -regex '.*/.*\.\(c\|h\|cpp\)' | xargs -P8 -n1 clang-format --style=file -i

Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10091>
This commit is contained in:
Bas Nieuwenhuizen
2021-04-10 03:24:05 +02:00
parent 8451b41022
commit 59c501ca35
65 changed files with 47908 additions and 51861 deletions
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+30 -31
View File
@@ -30,30 +30,30 @@
#define ATI_VENDOR_ID 0x1002
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_DISCARD_RECTANGLES 4
#define MAX_SAMPLE_LOCATIONS 32
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
#define MAX_SAMPLES_LOG2 4
#define NUM_META_FS_KEYS 12
#define RADV_MAX_DRM_DEVICES 8
#define MAX_VIEWS 8
#define MAX_SO_STREAMS 4
#define MAX_SO_BUFFERS 4
#define MAX_SO_OUTPUTS 64
#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_DISCARD_RECTANGLES 4
#define MAX_SAMPLE_LOCATIONS 32
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
#define MAX_SAMPLES_LOG2 4
#define NUM_META_FS_KEYS 12
#define RADV_MAX_DRM_DEVICES 8
#define MAX_VIEWS 8
#define MAX_SO_STREAMS 4
#define MAX_SO_BUFFERS 4
#define MAX_SO_OUTPUTS 64
#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64
#define MAX_BIND_POINTS 2 /* compute + graphics */
#define MAX_BIND_POINTS 2 /* compute + graphics */
#define NUM_DEPTH_CLEAR_PIPELINES 3
#define NUM_DEPTH_CLEAR_PIPELINES 3
#define NUM_DEPTH_DECOMPRESS_PIPELINES 3
/*
@@ -65,13 +65,13 @@
#define RADV_BUFFER_UPDATE_THRESHOLD 1024
/* descriptor index into scratch ring offsets */
#define RING_SCRATCH 0
#define RING_ESGS_VS 1
#define RING_ESGS_GS 2
#define RING_GSVS_VS 3
#define RING_GSVS_GS 4
#define RING_HS_TESS_FACTOR 5
#define RING_HS_TESS_OFFCHIP 6
#define RING_SCRATCH 0
#define RING_ESGS_VS 1
#define RING_ESGS_GS 2
#define RING_GSVS_VS 3
#define RING_GSVS_GS 4
#define RING_HS_TESS_FACTOR 5
#define RING_HS_TESS_OFFCHIP 6
#define RING_PS_SAMPLE_POSITIONS 7
/* max number of descriptor sets */
@@ -80,7 +80,7 @@
/* Make sure everything is addressable by a signed 32-bit int, and
* our largest descriptors are 96 bytes.
*/
#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31 ) / 96)
#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31) / 96)
/* Our buffer size fields allow only 2**32 - 1. We round that down to a multiple
* of 4 bytes so we can align buffer sizes up.
@@ -91,4 +91,3 @@
#define RADV_SUBGROUP_SIZE 64
#endif /* RADV_CONSTANTS_H */
+108 -108
View File
@@ -25,166 +25,166 @@
#ifndef RADV_CS_H
#define RADV_CS_H
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "radv_private.h"
#include "sid.h"
static inline unsigned radeon_check_space(struct radeon_winsys *ws,
struct radeon_cmdbuf *cs,
unsigned needed)
static inline unsigned
radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
{
if (cs->max_dw - cs->cdw < needed)
ws->cs_grow(cs, needed);
return cs->cdw + needed;
if (cs->max_dw - cs->cdw < needed)
ws->cs_grow(cs, needed);
return cs->cdw + needed;
}
static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
static inline void
radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
static inline void
radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
radeon_set_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
radeon_set_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
static inline void
radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
static inline void
radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
radeon_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
radeon_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs,
unsigned reg, unsigned idx,
unsigned value)
static inline void
radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs,
unsigned reg, unsigned value,
unsigned mask)
static inline void
radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg, unsigned value, unsigned mask)
{
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 4 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, mask);
radeon_emit(cs, value);
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 4 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, mask);
radeon_emit(cs, value);
}
static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
static inline void
radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
static inline void
radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
radeon_set_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
radeon_set_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice,
struct radeon_cmdbuf *cs,
unsigned reg, unsigned idx,
unsigned value)
static inline void
radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(idx);
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(idx);
unsigned opcode = PKT3_SET_SH_REG_INDEX;
if (pdevice->rad_info.chip_class < GFX10)
opcode = PKT3_SET_SH_REG;
unsigned opcode = PKT3_SET_SH_REG_INDEX;
if (pdevice->rad_info.chip_class < GFX10)
opcode = PKT3_SET_SH_REG;
radeon_emit(cs, PKT3(opcode, 1, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
radeon_emit(cs, PKT3(opcode, 1, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs,
unsigned reg, unsigned num)
static inline void
radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
static inline void
radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
radeon_set_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
radeon_set_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice,
struct radeon_cmdbuf *cs,
unsigned reg, unsigned idx,
unsigned value)
static inline void
radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(idx);
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(idx);
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (pdevice->rad_info.chip_class < GFX9 ||
(pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG;
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (pdevice->rad_info.chip_class < GFX9 ||
(pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG;
radeon_emit(cs, PKT3(opcode, 1, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
radeon_emit(cs, PKT3(opcode, 1, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs,
unsigned reg,
unsigned value)
static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
assert(reg < CIK_UCONFIG_REG_OFFSET);
assert(cs->cdw + 6 <= cs->max_dw);
assert(reg < CIK_UCONFIG_REG_OFFSET);
assert(cs->cdw + 6 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
COPY_DATA_DST_SEL(COPY_DATA_PERF));
radeon_emit(cs, value);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
radeon_emit(cs, value);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0); /* unused */
}
#endif /* RADV_CS_H */
File diff suppressed because it is too large Load Diff
+50 -54
View File
@@ -28,69 +28,65 @@
/* Please keep docs/envvars.rst up-to-date when you add/remove options. */
enum {
RADV_DEBUG_NO_FAST_CLEARS = 1ull << 0,
RADV_DEBUG_NO_DCC = 1ull << 1,
RADV_DEBUG_DUMP_SHADERS = 1ull << 2,
RADV_DEBUG_NO_CACHE = 1ull << 3,
RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
RADV_DEBUG_NO_HIZ = 1ull << 5,
RADV_DEBUG_NO_COMPUTE_QUEUE = 1ull << 6,
RADV_DEBUG_ALL_BOS = 1ull << 7,
RADV_DEBUG_NO_IBS = 1ull << 8,
RADV_DEBUG_DUMP_SPIRV = 1ull << 9,
RADV_DEBUG_VM_FAULTS = 1ull << 10,
RADV_DEBUG_ZERO_VRAM = 1ull << 11,
RADV_DEBUG_SYNC_SHADERS = 1ull << 12,
RADV_DEBUG_PREOPTIR = 1ull << 13,
RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
RADV_DEBUG_NO_OUT_OF_ORDER = 1ull << 15,
RADV_DEBUG_INFO = 1ull << 16,
RADV_DEBUG_ERRORS = 1ull << 17,
RADV_DEBUG_STARTUP = 1ull << 18,
RADV_DEBUG_CHECKIR = 1ull << 19,
RADV_DEBUG_NOTHREADLLVM = 1ull << 20,
RADV_DEBUG_NOBINNING = 1ull << 21,
RADV_DEBUG_NO_NGG = 1ull << 22,
RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
RADV_DEBUG_NO_MEMORY_CACHE = 1ull << 24,
RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
RADV_DEBUG_LLVM = 1ull << 26,
RADV_DEBUG_FORCE_COMPRESS = 1ull << 27,
RADV_DEBUG_HANG = 1ull << 28,
RADV_DEBUG_IMG = 1ull << 29,
RADV_DEBUG_NO_UMR = 1ull << 30,
RADV_DEBUG_INVARIANT_GEOM = 1ull << 31,
RADV_DEBUG_NO_DISPLAY_DCC = 1ull << 32,
RADV_DEBUG_NO_TC_COMPAT_CMASK= 1ull << 33,
RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
RADV_DEBUG_NO_FAST_CLEARS = 1ull << 0,
RADV_DEBUG_NO_DCC = 1ull << 1,
RADV_DEBUG_DUMP_SHADERS = 1ull << 2,
RADV_DEBUG_NO_CACHE = 1ull << 3,
RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
RADV_DEBUG_NO_HIZ = 1ull << 5,
RADV_DEBUG_NO_COMPUTE_QUEUE = 1ull << 6,
RADV_DEBUG_ALL_BOS = 1ull << 7,
RADV_DEBUG_NO_IBS = 1ull << 8,
RADV_DEBUG_DUMP_SPIRV = 1ull << 9,
RADV_DEBUG_VM_FAULTS = 1ull << 10,
RADV_DEBUG_ZERO_VRAM = 1ull << 11,
RADV_DEBUG_SYNC_SHADERS = 1ull << 12,
RADV_DEBUG_PREOPTIR = 1ull << 13,
RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
RADV_DEBUG_NO_OUT_OF_ORDER = 1ull << 15,
RADV_DEBUG_INFO = 1ull << 16,
RADV_DEBUG_ERRORS = 1ull << 17,
RADV_DEBUG_STARTUP = 1ull << 18,
RADV_DEBUG_CHECKIR = 1ull << 19,
RADV_DEBUG_NOTHREADLLVM = 1ull << 20,
RADV_DEBUG_NOBINNING = 1ull << 21,
RADV_DEBUG_NO_NGG = 1ull << 22,
RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
RADV_DEBUG_NO_MEMORY_CACHE = 1ull << 24,
RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
RADV_DEBUG_LLVM = 1ull << 26,
RADV_DEBUG_FORCE_COMPRESS = 1ull << 27,
RADV_DEBUG_HANG = 1ull << 28,
RADV_DEBUG_IMG = 1ull << 29,
RADV_DEBUG_NO_UMR = 1ull << 30,
RADV_DEBUG_INVARIANT_GEOM = 1ull << 31,
RADV_DEBUG_NO_DISPLAY_DCC = 1ull << 32,
RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 33,
RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
};
enum {
RADV_PERFTEST_LOCAL_BOS = 1u << 0,
RADV_PERFTEST_DCC_MSAA = 1u << 1,
RADV_PERFTEST_BO_LIST = 1u << 2,
RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
RADV_PERFTEST_CS_WAVE_32 = 1u << 4,
RADV_PERFTEST_PS_WAVE_32 = 1u << 5,
RADV_PERFTEST_GE_WAVE_32 = 1u << 6,
RADV_PERFTEST_DFSM = 1u << 7,
RADV_PERFTEST_NO_SAM = 1u << 8,
RADV_PERFTEST_SAM = 1u << 9,
RADV_PERFTEST_DCC_STORES = 1u << 10,
RADV_PERFTEST_LOCAL_BOS = 1u << 0,
RADV_PERFTEST_DCC_MSAA = 1u << 1,
RADV_PERFTEST_BO_LIST = 1u << 2,
RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
RADV_PERFTEST_CS_WAVE_32 = 1u << 4,
RADV_PERFTEST_PS_WAVE_32 = 1u << 5,
RADV_PERFTEST_GE_WAVE_32 = 1u << 6,
RADV_PERFTEST_DFSM = 1u << 7,
RADV_PERFTEST_NO_SAM = 1u << 8,
RADV_PERFTEST_SAM = 1u << 9,
RADV_PERFTEST_DCC_STORES = 1u << 10,
};
bool
radv_init_trace(struct radv_device *device);
bool radv_init_trace(struct radv_device *device);
void radv_finish_trace(struct radv_device *device);
void
radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
void radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
void
radv_print_spirv(const char *data, uint32_t size, FILE *fp);
void radv_print_spirv(const char *data, uint32_t size, FILE *fp);
void
radv_dump_enabled_options(struct radv_device *device, FILE *f);
void radv_dump_enabled_options(struct radv_device *device, FILE *f);
bool radv_trap_handler_init(struct radv_device *device);
void radv_trap_handler_finish(struct radv_device *device);
File diff suppressed because it is too large Load Diff
+15 -12
View File
@@ -104,27 +104,30 @@ struct radv_pipeline_layout {
static inline const uint32_t *
radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
const struct radv_descriptor_set_binding_layout *binding) {
return (const uint32_t*)((const char*)set + binding->immutable_samplers_offset);
const struct radv_descriptor_set_binding_layout *binding)
{
return (const uint32_t *)((const char *)set + binding->immutable_samplers_offset);
}
static inline unsigned
radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
radv_combined_image_descriptor_sampler_offset(
const struct radv_descriptor_set_binding_layout *binding)
{
return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
}
static inline const struct radv_sampler_ycbcr_conversion *
radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set,
unsigned binding_index)
radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsigned binding_index)
{
if (!set->ycbcr_sampler_offsets_offset)
return NULL;
if (!set->ycbcr_sampler_offsets_offset)
return NULL;
const uint32_t *offsets = (const uint32_t*)((const char*)set + set->ycbcr_sampler_offsets_offset);
const uint32_t *offsets =
(const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset);
if (offsets[binding_index] == 0)
return NULL;
return (const struct radv_sampler_ycbcr_conversion *)((const char*)set + offsets[binding_index]);
if (offsets[binding_index] == 0)
return NULL;
return (const struct radv_sampler_ycbcr_conversion *)((const char *)set +
offsets[binding_index]);
}
#endif /* RADV_DESCRIPTOR_SET_H */
+6088 -6444
View File
File diff suppressed because it is too large Load Diff
+1595 -1647
View File
File diff suppressed because it is too large Load Diff
+1685 -1841
View File
File diff suppressed because it is too large Load Diff
+90 -96
View File
@@ -20,131 +20,125 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
#include "ac_llvm_util.h"
#include "radv_shader_helper.h"
#include <list>
class radv_llvm_per_thread_info {
public:
radv_llvm_per_thread_info(enum radeon_family arg_family,
enum ac_target_machine_options arg_tm_options,
unsigned arg_wave_size)
: family(arg_family), tm_options(arg_tm_options),
wave_size(arg_wave_size), passes(NULL), passes_wave32(NULL) {}
public:
radv_llvm_per_thread_info(enum radeon_family arg_family,
enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size)
: family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL),
passes_wave32(NULL)
{
}
~radv_llvm_per_thread_info()
{
ac_destroy_llvm_compiler(&llvm_info);
}
~radv_llvm_per_thread_info()
{
ac_destroy_llvm_compiler(&llvm_info);
}
bool init(void)
{
if (!ac_init_llvm_compiler(&llvm_info,
family,
tm_options))
return false;
bool init(void)
{
if (!ac_init_llvm_compiler(&llvm_info, family, tm_options))
return false;
passes = ac_create_llvm_passes(llvm_info.tm);
if (!passes)
return false;
passes = ac_create_llvm_passes(llvm_info.tm);
if (!passes)
return false;
if (llvm_info.tm_wave32) {
passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
if (!passes_wave32)
return false;
}
if (llvm_info.tm_wave32) {
passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
if (!passes_wave32)
return false;
}
return true;
}
return true;
}
bool compile_to_memory_buffer(LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size)
{
struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
}
bool compile_to_memory_buffer(LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size)
{
struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
}
bool is_same(enum radeon_family arg_family,
enum ac_target_machine_options arg_tm_options,
unsigned arg_wave_size) {
if (arg_family == family &&
arg_tm_options == tm_options &&
arg_wave_size == wave_size)
return true;
return false;
}
struct ac_llvm_compiler llvm_info;
private:
enum radeon_family family;
enum ac_target_machine_options tm_options;
unsigned wave_size;
struct ac_compiler_passes *passes;
struct ac_compiler_passes *passes_wave32;
bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options,
unsigned arg_wave_size)
{
if (arg_family == family && arg_tm_options == tm_options && arg_wave_size == wave_size)
return true;
return false;
}
struct ac_llvm_compiler llvm_info;
private:
enum radeon_family family;
enum ac_target_machine_options tm_options;
unsigned wave_size;
struct ac_compiler_passes *passes;
struct ac_compiler_passes *passes_wave32;
};
/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
bool radv_compile_to_elf(struct ac_llvm_compiler *info,
LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size)
bool
radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
size_t *pelf_size)
{
radv_llvm_per_thread_info *thread_info = nullptr;
radv_llvm_per_thread_info *thread_info = nullptr;
for (auto &I : radv_llvm_per_thread_list) {
if (I.llvm_info.tm == info->tm) {
thread_info = &I;
break;
}
}
for (auto &I : radv_llvm_per_thread_list) {
if (I.llvm_info.tm == info->tm) {
thread_info = &I;
break;
}
}
if (!thread_info) {
struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
ac_destroy_llvm_passes(passes);
return ret;
}
if (!thread_info) {
struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
ac_destroy_llvm_passes(passes);
return ret;
}
return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
}
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
bool thread_compiler,
enum radeon_family family,
enum ac_target_machine_options tm_options,
unsigned wave_size)
bool
radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
enum radeon_family family, enum ac_target_machine_options tm_options,
unsigned wave_size)
{
if (thread_compiler) {
for (auto &I : radv_llvm_per_thread_list) {
if (I.is_same(family, tm_options, wave_size)) {
*info = I.llvm_info;
return true;
}
}
if (thread_compiler) {
for (auto &I : radv_llvm_per_thread_list) {
if (I.is_same(family, tm_options, wave_size)) {
*info = I.llvm_info;
return true;
}
}
radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
if (!tinfo.init()) {
radv_llvm_per_thread_list.pop_back();
return false;
}
if (!tinfo.init()) {
radv_llvm_per_thread_list.pop_back();
return false;
}
*info = tinfo.llvm_info;
return true;
}
*info = tinfo.llvm_info;
return true;
}
if (!ac_init_llvm_compiler(info,
family,
tm_options))
return false;
return true;
if (!ac_init_llvm_compiler(info, family, tm_options))
return false;
return true;
}
void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
bool thread_compiler)
void
radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler)
{
if (!thread_compiler)
ac_destroy_llvm_compiler(info);
if (!thread_compiler)
ac_destroy_llvm_compiler(info);
}
+430 -447
View File
File diff suppressed because it is too large Load Diff
+123 -152
View File
@@ -34,62 +34,62 @@ extern "C" {
#endif
enum radv_meta_save_flags {
RADV_META_SAVE_PASS = (1 << 0),
RADV_META_SAVE_CONSTANTS = (1 << 1),
RADV_META_SAVE_DESCRIPTORS = (1 << 2),
RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
RADV_META_SAVE_SAMPLE_LOCATIONS = (1 << 5),
RADV_META_SAVE_PASS = (1 << 0),
RADV_META_SAVE_CONSTANTS = (1 << 1),
RADV_META_SAVE_DESCRIPTORS = (1 << 2),
RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
RADV_META_SAVE_SAMPLE_LOCATIONS = (1 << 5),
};
struct radv_meta_saved_state {
uint32_t flags;
uint32_t flags;
struct radv_descriptor_set *old_descriptor_set0;
struct radv_pipeline *old_pipeline;
struct radv_viewport_state viewport;
struct radv_scissor_state scissor;
struct radv_sample_locations_state sample_location;
struct radv_descriptor_set *old_descriptor_set0;
struct radv_pipeline *old_pipeline;
struct radv_viewport_state viewport;
struct radv_scissor_state scissor;
struct radv_sample_locations_state sample_location;
char push_constants[128];
char push_constants[128];
struct radv_render_pass *pass;
const struct radv_subpass *subpass;
struct radv_attachment_state *attachments;
struct radv_framebuffer *framebuffer;
VkRect2D render_area;
struct radv_render_pass *pass;
const struct radv_subpass *subpass;
struct radv_attachment_state *attachments;
struct radv_framebuffer *framebuffer;
VkRect2D render_area;
VkCullModeFlags cull_mode;
VkFrontFace front_face;
VkCullModeFlags cull_mode;
VkFrontFace front_face;
unsigned primitive_topology;
unsigned primitive_topology;
bool depth_test_enable;
bool depth_write_enable;
unsigned depth_compare_op;
bool depth_bounds_test_enable;
bool stencil_test_enable;
bool depth_test_enable;
bool depth_write_enable;
unsigned depth_compare_op;
bool depth_bounds_test_enable;
bool stencil_test_enable;
struct {
struct {
VkStencilOp fail_op;
VkStencilOp pass_op;
VkStencilOp depth_fail_op;
VkCompareOp compare_op;
} front;
struct {
struct {
VkStencilOp fail_op;
VkStencilOp pass_op;
VkStencilOp depth_fail_op;
VkCompareOp compare_op;
} front;
struct {
VkStencilOp fail_op;
VkStencilOp pass_op;
VkStencilOp depth_fail_op;
VkCompareOp compare_op;
} back;
} stencil_op;
struct {
VkStencilOp fail_op;
VkStencilOp pass_op;
VkStencilOp depth_fail_op;
VkCompareOp compare_op;
} back;
} stencil_op;
struct {
VkExtent2D size;
VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
} fragment_shading_rate;
struct {
VkExtent2D size;
VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
} fragment_shading_rate;
};
VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand);
@@ -128,135 +128,111 @@ void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
VkResult radv_device_init_meta_dcc_retile_state(struct radv_device *device);
void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
void radv_meta_save(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
uint32_t flags);
void radv_meta_restore(const struct radv_meta_saved_state *state,
struct radv_cmd_buffer *cmd_buffer);
struct radv_cmd_buffer *cmd_buffer);
VkImageViewType radv_meta_get_view_type(const struct radv_image *image);
uint32_t radv_meta_get_iview_layer(const struct radv_image *dest_image,
const VkImageSubresourceLayers *dest_subresource,
const VkOffset3D *dest_offset);
const VkImageSubresourceLayers *dest_subresource,
const VkOffset3D *dest_offset);
struct radv_meta_blit2d_surf {
/** The size of an element in bytes. */
uint8_t bs;
VkFormat format;
/** The size of an element in bytes. */
uint8_t bs;
VkFormat format;
struct radv_image *image;
unsigned level;
unsigned layer;
VkImageAspectFlags aspect_mask;
VkImageLayout current_layout;
bool disable_compression;
struct radv_image *image;
unsigned level;
unsigned layer;
VkImageAspectFlags aspect_mask;
VkImageLayout current_layout;
bool disable_compression;
};
struct radv_meta_blit2d_buffer {
struct radv_buffer *buffer;
uint32_t offset;
uint32_t pitch;
uint8_t bs;
VkFormat format;
struct radv_buffer *buffer;
uint32_t offset;
uint32_t pitch;
uint8_t bs;
VkFormat format;
};
struct radv_meta_blit2d_rect {
uint32_t src_x, src_y;
uint32_t dst_x, dst_y;
uint32_t width, height;
uint32_t src_x, src_y;
uint32_t dst_x, dst_y;
uint32_t width, height;
};
void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_saved_state *save);
void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src_img,
struct radv_meta_blit2d_buffer *src_buf,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_saved_state *save);
void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
unsigned num_rects, struct radv_meta_blit2d_rect *rects);
void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
VkResult radv_device_init_meta_bufimage_state(struct radv_device *device);
void radv_device_finish_meta_bufimage_state(struct radv_device *device);
void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_buffer *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *dst,
const VkClearColorValue *clear_color);
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
const VkClearColorValue *clear_color);
void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs);
void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs);
void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs);
void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs);
void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image);
void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkFormat src_format,
VkImageLayout src_image_layout,
struct radv_image *dest_image,
VkFormat dest_format,
VkImageLayout dest_image_layout,
const VkImageResolve2KHR *region);
struct radv_image *src_image, VkFormat src_format,
VkImageLayout src_image_layout, struct radv_image *dest_image,
VkFormat dest_format, VkImageLayout dest_image_layout,
const VkImageResolve2KHR *region);
void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
struct radv_image *dest_image,
VkImageLayout dest_image_layout,
const VkImageResolve2KHR *region);
struct radv_image *src_image, VkImageLayout src_image_layout,
struct radv_image *dest_image,
VkImageLayout dest_image_layout,
const VkImageResolve2KHR *region);
void radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer);
void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
const VkImageResolve2KHR *region);
void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
VkImageLayout src_image_layout, const VkImageResolve2KHR *region);
uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
const struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
/**
* Return whether the bound pipeline is the FMASK decompress pass.
@@ -264,11 +240,11 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
static inline bool
radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
return radv_pipeline_to_handle(pipeline) ==
meta_state->fast_clear_flush.fmask_decompress_pipeline;
return radv_pipeline_to_handle(pipeline) ==
meta_state->fast_clear_flush.fmask_decompress_pipeline;
}
/**
@@ -277,11 +253,10 @@ radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
static inline bool
radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
return radv_pipeline_to_handle(pipeline) ==
meta_state->fast_clear_flush.dcc_decompress_pipeline;
return radv_pipeline_to_handle(pipeline) == meta_state->fast_clear_flush.dcc_decompress_pipeline;
}
/* common nir builder helpers */
@@ -292,15 +267,11 @@ nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *c
nir_shader *radv_meta_build_nir_vs_generate_vertices(void);
nir_shader *radv_meta_build_nir_fs_noop(void);
void radv_meta_build_resolve_shader_core(nir_builder *b,
bool is_integer,
int samples,
nir_variable *input_img,
nir_variable *color,
nir_ssa_def *img_coord);
void radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color,
nir_ssa_def *img_coord);
nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set,
unsigned binding);
nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
#ifdef __cplusplus
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+385 -462
View File
@@ -1,527 +1,450 @@
#include "radv_meta.h"
#include "nir/nir_builder.h"
#include "radv_meta.h"
#include "sid.h"
#include "radv_cs.h"
#include "sid.h"
static nir_shader *
build_buffer_fill_shader(struct radv_device *dev)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
"meta_buffer_fill");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_fill");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size =
nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
offset = nir_channel(&b, offset, 0);
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
offset = nir_channel(&b, offset, 0);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=4);
nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]) { 0, 0, 0, 0}, 4);
nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]){0, 0, 0, 0}, 4);
nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask=0xf,
.access=ACCESS_NON_READABLE, .align_mul=16);
nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask = 0xf,
.access = ACCESS_NON_READABLE, .align_mul = 16);
return b.shader;
return b.shader;
}
static nir_shader *
build_buffer_copy_shader(struct radv_device *dev)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
"meta_buffer_copy");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_copy");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size =
nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
offset = nir_channel(&b, offset, 0);
nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
offset = nir_channel(&b, offset, 0);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul=16);
nir_store_ssbo(&b, load, dst_buf, offset, .write_mask=0xf,
.access=ACCESS_NON_READABLE, .align_mul=16);
nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul = 16);
nir_store_ssbo(&b, load, dst_buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,
.align_mul = 16);
return b.shader;
return b.shader;
}
VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
VkResult
radv_device_init_meta_buffer_state(struct radv_device *device)
{
VkResult result;
nir_shader *fill_cs = build_buffer_fill_shader(device);
nir_shader *copy_cs = build_buffer_copy_shader(device);
VkResult result;
nir_shader *fill_cs = build_buffer_fill_shader(device);
nir_shader *copy_cs = build_buffer_copy_shader(device);
VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]){
{.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&fill_ds_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.fill_ds_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &fill_ds_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.fill_ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]){
{.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
{.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&copy_ds_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.copy_ds_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &copy_ds_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.copy_ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo fill_pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
};
VkPipelineLayoutCreateInfo fill_pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.fill_p_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&fill_pl_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.fill_p_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo copy_pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
.pushConstantRangeCount = 0,
};
VkPipelineLayoutCreateInfo copy_pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
.pushConstantRangeCount = 0,
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &copy_pl_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.copy_p_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&copy_pl_create_info,
&device->meta_state.alloc,
&device->meta_state.buffer.copy_p_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(fill_cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(fill_cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo fill_vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = fill_pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.buffer.fill_p_layout,
};
VkComputePipelineCreateInfo fill_vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = fill_pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.buffer.fill_p_layout,
};
result = radv_CreateComputePipelines(
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
&fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &fill_vk_pipeline_info, NULL,
&device->meta_state.buffer.fill_pipeline);
if (result != VK_SUCCESS)
goto fail;
VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(copy_cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(copy_cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo copy_vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = copy_pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.buffer.copy_p_layout,
};
VkComputePipelineCreateInfo copy_vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = copy_pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.buffer.copy_p_layout,
};
result = radv_CreateComputePipelines(
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
&copy_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &copy_vk_pipeline_info, NULL,
&device->meta_state.buffer.copy_pipeline);
if (result != VK_SUCCESS)
goto fail;
ralloc_free(fill_cs);
ralloc_free(copy_cs);
return VK_SUCCESS;
ralloc_free(fill_cs);
ralloc_free(copy_cs);
return VK_SUCCESS;
fail:
radv_device_finish_meta_buffer_state(device);
ralloc_free(fill_cs);
ralloc_free(copy_cs);
return result;
radv_device_finish_meta_buffer_state(device);
ralloc_free(fill_cs);
ralloc_free(copy_cs);
return result;
}
void radv_device_finish_meta_buffer_state(struct radv_device *device)
void
radv_device_finish_meta_buffer_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
struct radv_meta_state *state = &device->meta_state;
radv_DestroyPipeline(radv_device_to_handle(device),
state->buffer.copy_pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->buffer.fill_pipeline, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->buffer.copy_p_layout, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->buffer.fill_p_layout, &state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->buffer.copy_ds_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->buffer.fill_ds_layout,
&state->alloc);
}
static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *bo,
uint64_t offset, uint64_t size, uint32_t value)
{
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE |
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = bo,
.offset = offset,
.size = size
};
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.fill_pipeline);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.fill_p_layout,
0, /* set */
1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo) {
.buffer = radv_buffer_to_handle(&dst_buffer),
.offset = 0,
.range = size
}
}
});
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.buffer.fill_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
&value);
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
}
static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo,
uint64_t src_offset, uint64_t dst_offset,
uint64_t size)
{
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE |
RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = dst_bo,
.offset = dst_offset,
.size = size
};
struct radv_buffer src_buffer = {
.bo = src_bo,
.offset = src_offset,
.size = size
};
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.copy_pipeline);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.copy_p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo) {
.buffer = radv_buffer_to_handle(&dst_buffer),
.offset = 0,
.range = size
}
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo) {
.buffer = radv_buffer_to_handle(&src_buffer),
.offset = 0,
.range = size
}
}
});
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
}
static bool
radv_prefer_compute_dma(const struct radv_device *device,
uint64_t size,
struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo)
{
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
if (device->physical_device->rad_info.chip_class >= GFX10 &&
device->physical_device->rad_info.has_dedicated_vram) {
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
!(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
use_compute = false;
}
}
return use_compute;
}
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
const struct radv_image *image,
struct radeon_winsys_bo *bo,
uint64_t offset, uint64_t size, uint32_t value)
{
bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
uint32_t flush_bits = 0;
assert(!(offset & 3));
assert(!(size & 3));
if (use_compute) {
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
fill_buffer_shader(cmd_buffer, bo, offset, size, value);
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
} else if (size) {
uint64_t va = radv_buffer_get_va(bo);
va += offset;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
}
return flush_bits;
}
static
void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo,
uint64_t src_offset, uint64_t dst_offset,
uint64_t size)
{
bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
if (use_compute)
copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
src_offset, dst_offset, size);
else if (size) {
uint64_t src_va = radv_buffer_get_va(src_bo);
uint64_t dst_va = radv_buffer_get_va(dst_bo);
src_va += src_offset;
dst_va += dst_offset;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
}
}
void radv_CmdFillBuffer(
VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
if (fillSize == VK_WHOLE_SIZE)
fillSize = (dst_buffer->size - dstOffset) & ~3ull;
radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset,
fillSize, data);
radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.copy_ds_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.fill_ds_layout,
&state->alloc);
}
static void
copy_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer *src_buffer,
struct radv_buffer *dst_buffer,
const VkBufferCopy2KHR *region)
fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset,
uint64_t size, uint32_t value)
{
bool old_predicating;
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_state saved_state;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
radv_meta_save(
&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
radv_copy_buffer(cmd_buffer,
src_buffer->bo,
dst_buffer->bo,
src_buffer->offset + region->srcOffset,
dst_buffer->offset + region->dstOffset,
region->size);
struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.fill_pipeline);
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.fill_p_layout,
0, /* set */
1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
.offset = 0,
.range = size}}});
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
&value);
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyBuffer2KHR(
VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2KHR* pCopyBufferInfo)
static void
copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
uint64_t size)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
struct radv_meta_saved_state saved_state;
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
copy_buffer(cmd_buffer, src_buffer, dst_buffer,
&pCopyBufferInfo->pRegions[r]);
}
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {.bo = dst_bo, .offset = dst_offset, .size = size};
struct radv_buffer src_buffer = {.bo = src_bo, .offset = src_offset, .size = size};
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.copy_pipeline);
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.copy_p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
.offset = 0,
.range = size}},
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
.offset = 0,
.range = size}}});
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdUpdateBuffer(
VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void* pData)
static bool
radv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
uint64_t words = dataSize / 4;
uint64_t va = radv_buffer_get_va(dst_buffer->bo);
va += dstOffset + dst_buffer->offset;
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
assert(!(dataSize & 3));
assert(!(va & 3));
if (device->physical_device->rad_info.chip_class >= GFX10 &&
device->physical_device->rad_info.has_dedicated_vram) {
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
!(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
use_compute = false;
}
}
if (!dataSize)
return;
if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
si_emit_cache_flush(cmd_buffer);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
V_370_MEM : V_370_MEM_GRBM) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit_array(cmd_buffer->cs, pData, words);
if (unlikely(cmd_buffer->device->trace_bo))
radv_cmd_buffer_trace_emit(cmd_buffer);
} else {
uint32_t buf_offset;
radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
buf_offset, dstOffset + dst_buffer->offset, dataSize);
}
return use_compute;
}
uint32_t
radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value)
{
bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
uint32_t flush_bits = 0;
assert(!(offset & 3));
assert(!(size & 3));
if (use_compute) {
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
fill_buffer_shader(cmd_buffer, bo, offset, size, value);
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
} else if (size) {
uint64_t va = radv_buffer_get_va(bo);
va += offset;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
}
return flush_bits;
}
static void
radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
uint64_t size)
{
bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
if (use_compute)
copy_buffer_shader(cmd_buffer, src_bo, dst_bo, src_offset, dst_offset, size);
else if (size) {
uint64_t src_va = radv_buffer_get_va(src_bo);
uint64_t dst_va = radv_buffer_get_va(dst_bo);
src_va += src_offset;
dst_va += dst_offset;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
}
}
void
radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
VkDeviceSize fillSize, uint32_t data)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
if (fillSize == VK_WHOLE_SIZE)
fillSize = (dst_buffer->size - dstOffset) & ~3ull;
radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset, fillSize,
data);
}
static void
copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer,
struct radv_buffer *dst_buffer, const VkBufferCopy2KHR *region)
{
bool old_predicating;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo,
src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset,
region->size);
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
}
void
radv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
copy_buffer(cmd_buffer, src_buffer, dst_buffer, &pCopyBufferInfo->pRegions[r]);
}
}
void
radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
VkDeviceSize dataSize, const void *pData)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
uint64_t words = dataSize / 4;
uint64_t va = radv_buffer_get_va(dst_buffer->bo);
va += dstOffset + dst_buffer->offset;
assert(!(dataSize & 3));
assert(!(va & 3));
if (!dataSize)
return;
if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
si_emit_cache_flush(cmd_buffer);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) |
S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit_array(cmd_buffer->cs, pData, words);
if (unlikely(cmd_buffer->device->trace_bo))
radv_cmd_buffer_trace_emit(cmd_buffer);
} else {
uint32_t buf_offset;
radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset,
dstOffset + dst_buffer->offset, dataSize);
}
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+396 -432
View File
@@ -27,8 +27,8 @@
static VkExtent3D
meta_image_block_size(const struct radv_image *image)
{
const struct util_format_description *desc = vk_format_description(image->vk_format);
return (VkExtent3D) { desc->block.width, desc->block.height, 1 };
const struct util_format_description *desc = vk_format_description(image->vk_format);
return (VkExtent3D){desc->block.width, desc->block.height, 1};
}
/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
@@ -36,16 +36,16 @@ meta_image_block_size(const struct radv_image *image)
* if Image is uncompressed or compressed, respectively.
*/
static struct VkExtent3D
meta_region_extent_el(const struct radv_image *image,
const VkImageType imageType,
meta_region_extent_el(const struct radv_image *image, const VkImageType imageType,
const struct VkExtent3D *extent)
{
const VkExtent3D block = meta_image_block_size(image);
return radv_sanitize_image_extent(imageType, (VkExtent3D) {
.width = DIV_ROUND_UP(extent->width , block.width),
.height = DIV_ROUND_UP(extent->height, block.height),
.depth = DIV_ROUND_UP(extent->depth , block.depth),
});
const VkExtent3D block = meta_image_block_size(image);
return radv_sanitize_image_extent(imageType,
(VkExtent3D){
.width = DIV_ROUND_UP(extent->width, block.width),
.height = DIV_ROUND_UP(extent->height, block.height),
.depth = DIV_ROUND_UP(extent->depth, block.depth),
});
}
/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
@@ -53,517 +53,481 @@ meta_region_extent_el(const struct radv_image *image,
* if Image is uncompressed or compressed, respectively.
*/
static struct VkOffset3D
meta_region_offset_el(const struct radv_image *image,
const struct VkOffset3D *offset)
meta_region_offset_el(const struct radv_image *image, const struct VkOffset3D *offset)
{
const VkExtent3D block = meta_image_block_size(image);
return radv_sanitize_image_offset(image->type, (VkOffset3D) {
.x = offset->x / block.width,
.y = offset->y / block.height,
.z = offset->z / block.depth,
});
const VkExtent3D block = meta_image_block_size(image);
return radv_sanitize_image_offset(image->type, (VkOffset3D){
.x = offset->x / block.width,
.y = offset->y / block.height,
.z = offset->z / block.depth,
});
}
static VkFormat
vk_format_for_size(int bs)
{
switch (bs) {
case 1: return VK_FORMAT_R8_UINT;
case 2: return VK_FORMAT_R8G8_UINT;
case 4: return VK_FORMAT_R8G8B8A8_UINT;
case 8: return VK_FORMAT_R16G16B16A16_UINT;
case 12: return VK_FORMAT_R32G32B32_UINT;
case 16: return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("Invalid format block size");
}
switch (bs) {
case 1:
return VK_FORMAT_R8_UINT;
case 2:
return VK_FORMAT_R8G8_UINT;
case 4:
return VK_FORMAT_R8G8B8A8_UINT;
case 8:
return VK_FORMAT_R16G16B16A16_UINT;
case 12:
return VK_FORMAT_R32G32B32_UINT;
case 16:
return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("Invalid format block size");
}
}
static struct radv_meta_blit2d_surf
blit_surf_for_image_level_layer(struct radv_image *image,
VkImageLayout layout,
const VkImageSubresourceLayers *subres,
VkImageAspectFlags aspect_mask)
blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout,
const VkImageSubresourceLayers *subres,
VkImageAspectFlags aspect_mask)
{
VkFormat format = radv_get_aspect_format(image, aspect_mask);
VkFormat format = radv_get_aspect_format(image, aspect_mask);
if (!radv_dcc_enabled(image, subres->mipLevel) &&
!(radv_image_is_tc_compat_htile(image)))
format = vk_format_for_size(vk_format_get_blocksize(format));
if (!radv_dcc_enabled(image, subres->mipLevel) && !(radv_image_is_tc_compat_htile(image)))
format = vk_format_for_size(vk_format_get_blocksize(format));
format = vk_format_no_srgb(format);
format = vk_format_no_srgb(format);
return (struct radv_meta_blit2d_surf) {
.format = format,
.bs = vk_format_get_blocksize(format),
.level = subres->mipLevel,
.layer = subres->baseArrayLayer,
.image = image,
.aspect_mask = aspect_mask,
.current_layout = layout,
};
return (struct radv_meta_blit2d_surf){
.format = format,
.bs = vk_format_get_blocksize(format),
.level = subres->mipLevel,
.layer = subres->baseArrayLayer,
.image = image,
.aspect_mask = aspect_mask,
.current_layout = layout,
};
}
bool
radv_image_is_renderable(struct radv_device *device, struct radv_image *image)
{
if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
image->vk_format == VK_FORMAT_R32G32B32_SINT ||
image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
return false;
if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
image->vk_format == VK_FORMAT_R32G32B32_SINT ||
image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
return false;
if (device->physical_device->rad_info.chip_class >= GFX9 &&
image->type == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image->vk_format) == 128 &&
vk_format_is_compressed(image->vk_format))
return false;
return true;
if (device->physical_device->rad_info.chip_class >= GFX9 && image->type == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image->vk_format) == 128 &&
vk_format_is_compressed(image->vk_format))
return false;
return true;
}
static void
copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer* buffer,
struct radv_image* image,
VkImageLayout layout,
const VkBufferImageCopy2KHR* region)
copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
struct radv_image *image, VkImageLayout layout,
const VkBufferImageCopy2KHR *region)
{
struct radv_meta_saved_state saved_state;
bool old_predicating;
bool cs;
struct radv_meta_saved_state saved_state;
bool old_predicating;
bool cs;
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->info.samples == 1);
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->info.samples == 1);
cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
!radv_image_is_renderable(cmd_buffer->device, image);
cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
!radv_image_is_renderable(cmd_buffer->device, image);
radv_meta_save(&saved_state, cmd_buffer,
(cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
RADV_META_SAVE_GRAPHICS_PIPELINE) |
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
radv_meta_save(&saved_state, cmd_buffer,
(cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
* extent is the size in texels of the source image to copy in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D img_offset_el =
meta_region_offset_el(image, &region->imageOffset);
const VkExtent3D bufferExtent = {
.width = region->bufferRowLength ?
region->bufferRowLength : region->imageExtent.width,
.height = region->bufferImageHeight ?
region->bufferImageHeight : region->imageExtent.height,
};
const VkExtent3D buf_extent_el =
meta_region_extent_el(image, image->type, &bufferExtent);
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
* extent is the size in texels of the source image to copy in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
const VkExtent3D bufferExtent = {
.width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
.height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
};
const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
/* Start creating blit rect */
const VkExtent3D img_extent_el =
meta_region_extent_el(image, image->type, &region->imageExtent);
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Start creating blit rect */
const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_bsurf =
blit_surf_for_image_level_layer(image,
layout,
&region->imageSubresource,
region->imageSubresource.aspectMask);
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer(
image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
uint32_t queue_mask = radv_image_queue_family_mask(image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
if (compressed) {
radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
.aspectMask = region->imageSubresource.aspectMask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = region->imageSubresource.layerCount,
});
img_bsurf.disable_compression = true;
}
img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
}
if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool compressed =
radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
if (compressed) {
radv_decompress_dcc(cmd_buffer, image,
&(VkImageSubresourceRange){
.aspectMask = region->imageSubresource.aspectMask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = region->imageSubresource.layerCount,
});
img_bsurf.disable_compression = true;
}
img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
}
struct radv_meta_blit2d_buffer buf_bsurf = {
.bs = img_bsurf.bs,
.format = img_bsurf.format,
.buffer = buffer,
.offset = region->bufferOffset,
.pitch = buf_extent_el.width,
};
struct radv_meta_blit2d_buffer buf_bsurf = {
.bs = img_bsurf.bs,
.format = img_bsurf.format,
.buffer = buffer,
.offset = region->bufferOffset,
.pitch = buf_extent_el.width,
};
if (image->type == VK_IMAGE_TYPE_3D)
img_bsurf.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->imageSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
if (image->type == VK_IMAGE_TYPE_3D)
img_bsurf.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->imageSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
rect.dst_x = img_offset_el.x;
rect.dst_y = img_offset_el.y;
rect.dst_x = img_offset_el.x;
rect.dst_y = img_offset_el.y;
/* Perform Blit */
if (cs) {
radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
} else {
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
}
/* Perform Blit */
if (cs) {
radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
} else {
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
}
/* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just
* increment the offset directly in the image effectively
* re-binding it to different backing memory.
*/
buf_bsurf.offset += buf_extent_el.width * buf_extent_el.height * buf_bsurf.bs;
img_bsurf.layer++;
if (image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
/* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just
* increment the offset directly in the image effectively
* re-binding it to different backing memory.
*/
buf_bsurf.offset += buf_extent_el.width *
buf_extent_el.height * buf_bsurf.bs;
img_bsurf.layer++;
if (image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyBufferToImage2KHR(
VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo)
void
radv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r]);
}
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r]);
}
}
static void
copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer *buffer,
struct radv_image *image,
VkImageLayout layout,
const VkBufferImageCopy2KHR *region)
copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
struct radv_image *image, VkImageLayout layout,
const VkBufferImageCopy2KHR *region)
{
struct radv_meta_saved_state saved_state;
bool old_predicating;
struct radv_meta_saved_state saved_state;
bool old_predicating;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE |
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
radv_meta_save(
&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
* extent is the size in texels of the source image to copy in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D img_offset_el =
meta_region_offset_el(image, &region->imageOffset);
const VkExtent3D bufferExtent = {
.width = region->bufferRowLength ?
region->bufferRowLength : region->imageExtent.width,
.height = region->bufferImageHeight ?
region->bufferImageHeight : region->imageExtent.height,
};
const VkExtent3D buf_extent_el =
meta_region_extent_el(image, image->type, &bufferExtent);
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
* extent is the size in texels of the source image to copy in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
const VkExtent3D bufferExtent = {
.width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
.height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
};
const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
/* Start creating blit rect */
const VkExtent3D img_extent_el =
meta_region_extent_el(image, image->type, &region->imageExtent);
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Start creating blit rect */
const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_info =
blit_surf_for_image_level_layer(image,
layout,
&region->imageSubresource,
region->imageSubresource.aspectMask);
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer(
image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
uint32_t queue_mask = radv_image_queue_family_mask(image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
if (compressed) {
radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
.aspectMask = region->imageSubresource.aspectMask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = region->imageSubresource.layerCount,
});
img_info.disable_compression = true;
}
img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
}
if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool compressed =
radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
if (compressed) {
radv_decompress_dcc(cmd_buffer, image,
&(VkImageSubresourceRange){
.aspectMask = region->imageSubresource.aspectMask,
.baseMipLevel = region->imageSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->imageSubresource.baseArrayLayer,
.layerCount = region->imageSubresource.layerCount,
});
img_info.disable_compression = true;
}
img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
}
struct radv_meta_blit2d_buffer buf_info = {
.bs = img_info.bs,
.format = img_info.format,
.buffer = buffer,
.offset = region->bufferOffset,
.pitch = buf_extent_el.width,
};
struct radv_meta_blit2d_buffer buf_info = {
.bs = img_info.bs,
.format = img_info.format,
.buffer = buffer,
.offset = region->bufferOffset,
.pitch = buf_extent_el.width,
};
if (image->type == VK_IMAGE_TYPE_3D)
img_info.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->imageSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
if (image->type == VK_IMAGE_TYPE_3D)
img_info.layer = img_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->imageSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
rect.src_x = img_offset_el.x;
rect.src_y = img_offset_el.y;
rect.src_x = img_offset_el.x;
rect.src_y = img_offset_el.y;
/* Perform Blit */
radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
/* Perform Blit */
radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
buf_info.offset += buf_extent_el.width * buf_extent_el.height * buf_info.bs;
img_info.layer++;
if (image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
buf_info.offset += buf_extent_el.width *
buf_extent_el.height * buf_info.bs;
img_info.layer++;
if (image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyImageToBuffer2KHR(
VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo)
void
radv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
pCopyImageToBufferInfo->srcImageLayout,
&pCopyImageToBufferInfo->pRegions[r]);
}
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
pCopyImageToBufferInfo->srcImageLayout,
&pCopyImageToBufferInfo->pRegions[r]);
}
}
static void
copy_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
struct radv_image *dst_image,
VkImageLayout dst_image_layout,
const VkImageCopy2KHR *region)
copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
VkImageLayout src_image_layout, struct radv_image *dst_image,
VkImageLayout dst_image_layout, const VkImageCopy2KHR *region)
{
struct radv_meta_saved_state saved_state;
bool old_predicating;
bool cs;
struct radv_meta_saved_state saved_state;
bool old_predicating;
bool cs;
/* From the Vulkan 1.0 spec:
*
* vkCmdCopyImage can be used to copy image data between multisample
* images, but both images must have the same number of samples.
*/
assert(src_image->info.samples == dst_image->info.samples);
/* From the Vulkan 1.0 spec:
*
* vkCmdCopyImage can be used to copy image data between multisample
* images, but both images must have the same number of samples.
*/
assert(src_image->info.samples == dst_image->info.samples);
cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
!radv_image_is_renderable(cmd_buffer->device, dst_image);
cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
!radv_image_is_renderable(cmd_buffer->device, dst_image);
radv_meta_save(&saved_state, cmd_buffer,
(cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
RADV_META_SAVE_GRAPHICS_PIPELINE) |
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
radv_meta_save(&saved_state, cmd_buffer,
(cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
unsigned aspect_count = region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
src_aspects[0] = region->srcSubresource.aspectMask;
if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
dst_aspects[0] = region->dstSubresource.aspectMask;
VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
VK_IMAGE_ASPECT_PLANE_2_BIT};
VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
VK_IMAGE_ASPECT_PLANE_2_BIT};
unsigned aspect_count =
region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
src_aspects[0] = region->srcSubresource.aspectMask;
if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
dst_aspects[0] = region->dstSubresource.aspectMask;
for (unsigned a = 0; a < aspect_count; ++a) {
/* Create blit surfaces */
struct radv_meta_blit2d_surf b_src =
blit_surf_for_image_level_layer(src_image,
src_image_layout,
&region->srcSubresource,
src_aspects[a]);
for (unsigned a = 0; a < aspect_count; ++a) {
/* Create blit surfaces */
struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer(
src_image, src_image_layout, &region->srcSubresource, src_aspects[a]);
struct radv_meta_blit2d_surf b_dst =
blit_surf_for_image_level_layer(dst_image,
dst_image_layout,
&region->dstSubresource,
dst_aspects[a]);
struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer(
dst_image, dst_image_layout, &region->dstSubresource, dst_aspects[a]);
uint32_t dst_queue_mask = radv_image_queue_family_mask(dst_image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, dst_image_layout, false, dst_queue_mask);
uint32_t src_queue_mask = radv_image_queue_family_mask(src_image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, src_image_layout, false, src_queue_mask);
uint32_t dst_queue_mask = radv_image_queue_family_mask(
dst_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image,
dst_image_layout, false, dst_queue_mask);
uint32_t src_queue_mask = radv_image_queue_family_mask(
src_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image,
src_image_layout, false, src_queue_mask);
if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
b_src.format = b_dst.format;
} else if (!dst_compressed) {
b_dst.format = b_src.format;
} else {
radv_decompress_dcc(cmd_buffer, dst_image, &(VkImageSubresourceRange) {
.aspectMask = dst_aspects[a],
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->dstSubresource.baseArrayLayer,
.layerCount = region->dstSubresource.layerCount,
});
b_dst.format = b_src.format;
b_dst.disable_compression = true;
}
if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
b_src.format = b_dst.format;
} else if (!dst_compressed) {
b_dst.format = b_src.format;
} else {
radv_decompress_dcc(cmd_buffer, dst_image,
&(VkImageSubresourceRange){
.aspectMask = dst_aspects[a],
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = region->dstSubresource.baseArrayLayer,
.layerCount = region->dstSubresource.layerCount,
});
b_dst.format = b_src.format;
b_dst.disable_compression = true;
}
/**
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
* imageExtent is the size in texels of the image to copy in width, height
* and depth. 1D images use only x and width. 2D images use x, y, width
* and height. 3D images use x, y, z, width, height and depth.
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D dst_offset_el = meta_region_offset_el(dst_image, &region->dstOffset);
const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &region->srcOffset);
/**
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
* imageExtent is the size in texels of the image to copy in width, height
* and depth. 1D images use only x and width. 2D images use x, y, width
* and height. 3D images use x, y, z, width, height and depth.
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D dst_offset_el =
meta_region_offset_el(dst_image, &region->dstOffset);
const VkOffset3D src_offset_el =
meta_region_offset_el(src_image, &region->srcOffset);
/*
* From Vulkan 1.0.68, "Copying Data Between Images":
* "When copying between compressed and uncompressed formats
* the extent members represent the texel dimensions of the
* source image and not the destination."
* However, we must use the destination image type to avoid
* clamping depth when copying multiple layers of a 2D image to
* a 3D image.
*/
const VkExtent3D img_extent_el =
meta_region_extent_el(src_image, dst_image->type, &region->extent);
/*
* From Vulkan 1.0.68, "Copying Data Between Images":
* "When copying between compressed and uncompressed formats
* the extent members represent the texel dimensions of the
* source image and not the destination."
* However, we must use the destination image type to avoid
* clamping depth when copying multiple layers of a 2D image to
* a 3D image.
*/
const VkExtent3D img_extent_el =
meta_region_extent_el(src_image, dst_image->type, &region->extent);
/* Start creating blit rect */
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Start creating blit rect */
struct radv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
if (src_image->type == VK_IMAGE_TYPE_3D)
b_src.layer = src_offset_el.z;
if (src_image->type == VK_IMAGE_TYPE_3D)
b_src.layer = src_offset_el.z;
if (dst_image->type == VK_IMAGE_TYPE_3D)
b_dst.layer = dst_offset_el.z;
if (dst_image->type == VK_IMAGE_TYPE_3D)
b_dst.layer = dst_offset_el.z;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->dstSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = region->dstSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
/* Finish creating blit rect */
rect.dst_x = dst_offset_el.x;
rect.dst_y = dst_offset_el.y;
rect.src_x = src_offset_el.x;
rect.src_y = src_offset_el.y;
/* Finish creating blit rect */
rect.dst_x = dst_offset_el.x;
rect.dst_y = dst_offset_el.y;
rect.src_x = src_offset_el.x;
rect.src_y = src_offset_el.y;
/* Perform Blit */
if (cs) {
radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
} else {
radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
}
/* Perform Blit */
if (cs) {
radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
} else {
radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
}
b_src.layer++;
b_dst.layer++;
if (dst_image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
}
b_src.layer++;
b_dst.layer++;
if (dst_image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
}
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyImage2KHR(
VkCommandBuffer commandBuffer,
const VkCopyImageInfo2KHR* pCopyImageInfo)
void
radv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
copy_image(cmd_buffer,
src_image, pCopyImageInfo->srcImageLayout,
dst_image, pCopyImageInfo->dstImageLayout,
&pCopyImageInfo->pRegions[r]);
}
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image,
pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]);
}
}
+212 -236
View File
@@ -21,295 +21,271 @@
* IN THE SOFTWARE.
*/
#include "radv_private.h"
#include "radv_meta.h"
#include "radv_private.h"
static nir_shader *
build_dcc_retile_compute_shader(struct radv_device *dev)
{
const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
false,
GLSL_TYPE_UINT);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_UINT);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
b.shader->info.cs.local_size[0] = 256;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
b.shader->info.cs.local_size[0] = 256;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "indices_in");
indices->data.descriptor_set = 0;
indices->data.binding = 0;
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "dcc_in");
input_dcc->data.descriptor_set = 0;
input_dcc->data.binding = 1;
nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "dcc_out");
output_dcc->data.descriptor_set = 0;
output_dcc->data.binding = 2;
nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform, buf_type, "indices_in");
indices->data.descriptor_set = 0;
indices->data.binding = 0;
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
input_dcc->data.descriptor_set = 0;
input_dcc->data.binding = 1;
nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
output_dcc->data.descriptor_set = 0;
output_dcc->data.binding = 2;
nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
0, 0, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], 0, 0, 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *index_vals = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
index_vals->num_components = 2;
index_vals->src[0] = nir_src_for_ssa(indices_ref);
index_vals->src[1] = nir_src_for_ssa(global_id);
index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
nir_builder_instr_insert(&b, &index_vals->instr);
nir_intrinsic_instr *index_vals =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
index_vals->num_components = 2;
index_vals->src[0] = nir_src_for_ssa(indices_ref);
index_vals->src[1] = nir_src_for_ssa(global_id);
index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
nir_builder_instr_insert(&b, &index_vals->instr);
nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
nir_intrinsic_instr *dcc_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
dcc_val->num_components = 1;
dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
nir_builder_instr_insert(&b, &dcc_val->instr);
nir_intrinsic_instr *dcc_val =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
dcc_val->num_components = 1;
dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
nir_builder_instr_insert(&b, &dcc_val->instr);
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
store->num_components = 1;
store->src[0] = nir_src_for_ssa(output_dcc_ref);
store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
store->num_components = 1;
store->src[0] = nir_src_for_ssa(output_dcc_ref);
store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_builder_instr_insert(&b, &store->instr);
return b.shader;
nir_builder_instr_insert(&b, &store->instr);
return b.shader;
}
void
radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
struct radv_meta_state *state = &device->meta_state;
radv_DestroyPipeline(radv_device_to_handle(device),
state->dcc_retile.pipeline,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->dcc_retile.p_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->dcc_retile.ds_layout,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
&state->alloc);
/* Reset for next finish. */
memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
/* Reset for next finish. */
memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
}
VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device *device)
{
VkResult result = VK_SUCCESS;
nir_shader *cs = build_dcc_retile_compute_shader(device);
VkResult result = VK_SUCCESS;
nir_shader *cs = build_dcc_retile_compute_shader(device);
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 3,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 3,
.pBindings = (VkDescriptorSetLayoutBinding[]){
{.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
{.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
{.binding = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&ds_create_info,
&device->meta_state.alloc,
&device->meta_state.dcc_retile.ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
&device->meta_state.alloc,
&device->meta_state.dcc_retile.ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
.pushConstantRangeCount = 0,
};
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
.pushConstantRangeCount = 0,
};
result =
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
&device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);
if (result != VK_SUCCESS)
goto cleanup;
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_create_info,
&device->meta_state.alloc,
&device->meta_state.dcc_retile.p_layout);
if (result != VK_SUCCESS)
goto cleanup;
/* compute shader */
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.dcc_retile.p_layout,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.dcc_retile.p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info, NULL,
&device->meta_state.dcc_retile.pipeline);
if (result != VK_SUCCESS)
goto cleanup;
result = radv_CreateComputePipelines(
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
&vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline);
if (result != VK_SUCCESS)
goto cleanup;
cleanup:
if (result != VK_SUCCESS)
radv_device_finish_meta_dcc_retile_state(device);
ralloc_free(cs);
return result;
if (result != VK_SUCCESS)
radv_device_finish_meta_dcc_retile_state(device);
ralloc_free(cs);
return result;
}
void
radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
{
struct radv_meta_saved_state saved_state;
struct radv_device *device = cmd_buffer->device;
uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
struct radv_meta_saved_state saved_state;
struct radv_device *device = cmd_buffer->device;
uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
assert(image->type == VK_IMAGE_TYPE_2D);
assert(image->info.array_size == 1 && image->info.levels == 1);
assert(image->type == VK_IMAGE_TYPE_2D);
assert(image->info.array_size == 1 && image->info.levels == 1);
struct radv_cmd_state *state = &cmd_buffer->state;
struct radv_cmd_state *state = &cmd_buffer->state;
state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
/* Compile pipelines if not already done so. */
if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return;
}
}
/* Compile pipelines if not already done so. */
if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return;
}
}
radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
RADV_META_SAVE_COMPUTE_PIPELINE);
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.pipeline);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.pipeline);
struct radv_buffer buffer = {
.size = image->size,
.bo = image->bo,
.offset = image->offset
};
struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset = image->offset};
struct radv_buffer retile_buffer = {
.size = retile_map_size,
.bo = image->retile_map,
.offset = 0
};
struct radv_buffer retile_buffer = {.size = retile_map_size,
.bo = image->retile_map,
.offset = 0};
struct radv_buffer_view views[3];
VkBufferView view_handles[3];
radv_buffer_view_init(views + 0, cmd_buffer->device, &(VkBufferViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&retile_buffer),
.offset = 0,
.range = retile_map_size,
.format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ?
VK_FORMAT_R16G16_UINT : VK_FORMAT_R32G32_UINT,
});
radv_buffer_view_init(views + 1, cmd_buffer->device, &(VkBufferViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
.offset = image->planes[0].surface.dcc_offset,
.range = image->planes[0].surface.dcc_size,
.format = VK_FORMAT_R8_UINT,
});
radv_buffer_view_init(views + 2, cmd_buffer->device, &(VkBufferViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
.offset = image->planes[0].surface.display_dcc_offset,
.range = image->planes[0].surface.u.gfx9.display_dcc_size,
.format = VK_FORMAT_R8_UINT,
});
for (unsigned i = 0; i < 3; ++i)
view_handles[i] = radv_buffer_view_to_handle(&views[i]);
struct radv_buffer_view views[3];
VkBufferView view_handles[3];
radv_buffer_view_init(
views + 0, cmd_buffer->device,
&(VkBufferViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&retile_buffer),
.offset = 0,
.range = retile_map_size,
.format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ? VK_FORMAT_R16G16_UINT
: VK_FORMAT_R32G32_UINT,
});
radv_buffer_view_init(views + 1, cmd_buffer->device,
&(VkBufferViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
.offset = image->planes[0].surface.dcc_offset,
.range = image->planes[0].surface.dcc_size,
.format = VK_FORMAT_R8_UINT,
});
radv_buffer_view_init(views + 2, cmd_buffer->device,
&(VkBufferViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
.offset = image->planes[0].surface.display_dcc_offset,
.range = image->planes[0].surface.u.gfx9.display_dcc_size,
.format = VK_FORMAT_R8_UINT,
});
for (unsigned i = 0; i < 3; ++i)
view_handles[i] = radv_buffer_view_to_handle(&views[i]);
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.p_layout,
0, /* set */
3, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[0],
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[1],
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[2],
},
});
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.p_layout, 0, /* set */
3, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[0],
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[1],
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[2],
},
});
/* src+dst pairs count double, so the number of DCC bytes we move is
* actually half of dcc_retile_num_elements. */
radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2, 1, 1);
/* src+dst pairs count double, so the number of DCC bytes we move is
* actually half of dcc_retile_num_elements. */
radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2,
1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
}
+407 -443
View File
@@ -29,542 +29,506 @@
#include "sid.h"
enum radv_depth_op {
DEPTH_DECOMPRESS,
DEPTH_RESUMMARIZE,
DEPTH_DECOMPRESS,
DEPTH_RESUMMARIZE,
};
enum radv_depth_decompress {
DECOMPRESS_DEPTH_STENCIL,
DECOMPRESS_DEPTH,
DECOMPRESS_STENCIL,
DECOMPRESS_DEPTH_STENCIL,
DECOMPRESS_DEPTH,
DECOMPRESS_STENCIL,
};
static VkResult
create_pass(struct radv_device *device,
uint32_t samples,
VkRenderPass *pass)
create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)
{
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
VkAttachmentDescription2 attachment;
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
VkAttachmentDescription2 attachment;
attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
attachment.flags = 0;
attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
attachment.samples = samples;
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
attachment.flags = 0;
attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
attachment.samples = samples;
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
result = radv_CreateRenderPass2(device_h,
&(VkRenderPassCreateInfo2) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
.attachmentCount = 1,
.pAttachments = &attachment,
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription2) {
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = 0,
.pColorAttachments = NULL,
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
},
.preserveAttachmentCount = 0,
.pPreserveAttachments = NULL,
},
.dependencyCount = 2,
.pDependencies = (VkSubpassDependency2[]) {
{
.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = 0,
.dstAccessMask = 0,
.dependencyFlags = 0
},
{
.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
.srcSubpass = 0,
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = 0,
.dstAccessMask = 0,
.dependencyFlags = 0
}
},
},
alloc,
pass);
result = radv_CreateRenderPass2(
device_h,
&(VkRenderPassCreateInfo2){
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
.attachmentCount = 1,
.pAttachments = &attachment,
.subpassCount = 1,
.pSubpasses =
&(VkSubpassDescription2){
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = 0,
.pColorAttachments = NULL,
.pResolveAttachments = NULL,
.pDepthStencilAttachment =
&(VkAttachmentReference2){
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
},
.preserveAttachmentCount = 0,
.pPreserveAttachments = NULL,
},
.dependencyCount = 2,
.pDependencies =
(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = 0,
.dstAccessMask = 0,
.dependencyFlags = 0},
{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
.srcSubpass = 0,
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = 0,
.dstAccessMask = 0,
.dependencyFlags = 0}},
},
alloc, pass);
return result;
return result;
}
static VkResult
create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
{
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pSetLayouts = NULL,
.pushConstantRangeCount = 0,
.pPushConstantRanges = NULL,
};
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pSetLayouts = NULL,
.pushConstantRangeCount = 0,
.pPushConstantRanges = NULL,
};
return radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_create_info,
&device->meta_state.alloc,
layout);
return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
&device->meta_state.alloc, layout);
}
static VkResult
create_pipeline(struct radv_device *device,
uint32_t samples,
VkRenderPass pass,
VkPipelineLayout layout,
enum radv_depth_op op,
enum radv_depth_decompress decompress,
VkPipeline *pipeline)
create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,
VkPipelineLayout layout, enum radv_depth_op op,
enum radv_depth_decompress decompress, VkPipeline *pipeline)
{
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
mtx_lock(&device->meta_state.mtx);
if (*pipeline) {
mtx_unlock(&device->meta_state.mtx);
return VK_SUCCESS;
}
mtx_lock(&device->meta_state.mtx);
if (*pipeline) {
mtx_unlock(&device->meta_state.mtx);
return VK_SUCCESS;
}
nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
nir_shader *fs_module = radv_meta_build_nir_fs_noop();
nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
nir_shader *fs_module = radv_meta_build_nir_fs_noop();
if (!vs_module || !fs_module) {
/* XXX: Need more accurate error */
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto cleanup;
}
if (!vs_module || !fs_module) {
/* XXX: Need more accurate error */
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto cleanup;
}
const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
.sampleLocationsEnable = false,
};
const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
.sampleLocationsEnable = false,
};
const VkGraphicsPipelineCreateInfo pipeline_create_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = 2,
.pStages = (VkPipelineShaderStageCreateInfo[]) {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = vk_shader_module_handle_from_nir(vs_module),
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = vk_shader_module_handle_from_nir(fs_module),
.pName = "main",
},
},
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
},
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
},
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = &sample_locs_create_info,
.rasterizationSamples = samples,
.sampleShadingEnable = false,
.pSampleMask = NULL,
.alphaToCoverageEnable = false,
.alphaToOneEnable = false,
},
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = false,
.attachmentCount = 0,
.pAttachments = NULL,
},
.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = false,
.depthWriteEnable = false,
.depthBoundsTestEnable = false,
.stencilTestEnable = false,
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 3,
.pDynamicStates = (VkDynamicState[]) {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
},
},
.layout = layout,
.renderPass = pass,
.subpass = 0,
};
const VkGraphicsPipelineCreateInfo pipeline_create_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = 2,
.pStages =
(VkPipelineShaderStageCreateInfo[]){
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = vk_shader_module_handle_from_nir(vs_module),
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = vk_shader_module_handle_from_nir(fs_module),
.pName = "main",
},
},
.pVertexInputState =
&(VkPipelineVertexInputStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
},
.pInputAssemblyState =
&(VkPipelineInputAssemblyStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState =
&(VkPipelineViewportStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState =
&(VkPipelineRasterizationStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = &sample_locs_create_info,
.rasterizationSamples = samples,
.sampleShadingEnable = false,
.pSampleMask = NULL,
.alphaToCoverageEnable = false,
.alphaToOneEnable = false,
},
.pColorBlendState =
&(VkPipelineColorBlendStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = false,
.attachmentCount = 0,
.pAttachments = NULL,
},
.pDepthStencilState =
&(VkPipelineDepthStencilStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = false,
.depthWriteEnable = false,
.depthBoundsTestEnable = false,
.stencilTestEnable = false,
},
.pDynamicState =
&(VkPipelineDynamicStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 3,
.pDynamicStates =
(VkDynamicState[]){
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
},
},
.layout = layout,
.renderPass = pass,
.subpass = 0,
};
struct radv_graphics_pipeline_create_info extra = {
.use_rectlist = true,
.depth_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
decompress == DECOMPRESS_DEPTH,
.stencil_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
decompress == DECOMPRESS_STENCIL,
.resummarize_enable = op == DEPTH_RESUMMARIZE,
};
struct radv_graphics_pipeline_create_info extra = {
.use_rectlist = true,
.depth_compress_disable =
decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_DEPTH,
.stencil_compress_disable =
decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_STENCIL,
.resummarize_enable = op == DEPTH_RESUMMARIZE,
};
result = radv_graphics_pipeline_create(device_h,
radv_pipeline_cache_to_handle(&device->meta_state.cache),
&pipeline_create_info, &extra,
&device->meta_state.alloc,
pipeline);
result = radv_graphics_pipeline_create(
device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
&extra, &device->meta_state.alloc, pipeline);
cleanup:
ralloc_free(fs_module);
ralloc_free(vs_module);
mtx_unlock(&device->meta_state.mtx);
return result;
ralloc_free(fs_module);
ralloc_free(vs_module);
mtx_unlock(&device->meta_state.mtx);
return result;
}
void
radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
radv_DestroyRenderPass(radv_device_to_handle(device),
state->depth_decomp[i].pass,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->depth_decomp[i].p_layout,
&state->alloc);
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
&state->alloc);
for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
radv_DestroyPipeline(radv_device_to_handle(device),
state->depth_decomp[i].decompress_pipeline[j],
&state->alloc);
}
radv_DestroyPipeline(radv_device_to_handle(device),
state->depth_decomp[i].resummarize_pipeline,
&state->alloc);
}
for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
radv_DestroyPipeline(radv_device_to_handle(device),
state->depth_decomp[i].decompress_pipeline[j], &state->alloc);
}
radv_DestroyPipeline(radv_device_to_handle(device),
state->depth_decomp[i].resummarize_pipeline, &state->alloc);
}
}
VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
{
struct radv_meta_state *state = &device->meta_state;
VkResult res = VK_SUCCESS;
struct radv_meta_state *state = &device->meta_state;
VkResult res = VK_SUCCESS;
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
uint32_t samples = 1 << i;
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
uint32_t samples = 1 << i;
res = create_pass(device, samples, &state->depth_decomp[i].pass);
if (res != VK_SUCCESS)
goto fail;
res = create_pass(device, samples, &state->depth_decomp[i].pass);
if (res != VK_SUCCESS)
goto fail;
res = create_pipeline_layout(device,
&state->depth_decomp[i].p_layout);
if (res != VK_SUCCESS)
goto fail;
res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
if (res != VK_SUCCESS)
goto fail;
if (on_demand)
continue;
if (on_demand)
continue;
for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
res = create_pipeline(device, samples,
state->depth_decomp[i].pass,
state->depth_decomp[i].p_layout,
DEPTH_DECOMPRESS,
j,
&state->depth_decomp[i].decompress_pipeline[j]);
if (res != VK_SUCCESS)
goto fail;
}
for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
res = create_pipeline(device, samples, state->depth_decomp[i].pass,
state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS, j,
&state->depth_decomp[i].decompress_pipeline[j]);
if (res != VK_SUCCESS)
goto fail;
}
res = create_pipeline(device, samples,
state->depth_decomp[i].pass,
state->depth_decomp[i].p_layout,
DEPTH_RESUMMARIZE,
0, /* unused */
&state->depth_decomp[i].resummarize_pipeline);
if (res != VK_SUCCESS)
goto fail;
}
res = create_pipeline(device, samples, state->depth_decomp[i].pass,
state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE, 0, /* unused */
&state->depth_decomp[i].resummarize_pipeline);
if (res != VK_SUCCESS)
goto fail;
}
return VK_SUCCESS;
return VK_SUCCESS;
fail:
radv_device_finish_meta_depth_decomp_state(device);
return res;
radv_device_finish_meta_depth_decomp_state(device);
return res;
}
static VkPipeline *
radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
enum radv_depth_op op)
radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
{
struct radv_meta_state *state = &cmd_buffer->device->meta_state;
uint32_t samples = image->info.samples;
uint32_t samples_log2 = ffs(samples) - 1;
enum radv_depth_decompress decompress;
VkPipeline *pipeline;
struct radv_meta_state *state = &cmd_buffer->device->meta_state;
uint32_t samples = image->info.samples;
uint32_t samples_log2 = ffs(samples) - 1;
enum radv_depth_decompress decompress;
VkPipeline *pipeline;
if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
decompress = DECOMPRESS_DEPTH;
} else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
decompress = DECOMPRESS_STENCIL;
} else {
decompress = DECOMPRESS_DEPTH_STENCIL;
}
if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
decompress = DECOMPRESS_DEPTH;
} else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
decompress = DECOMPRESS_STENCIL;
} else {
decompress = DECOMPRESS_DEPTH_STENCIL;
}
if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
VkResult ret;
if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
VkResult ret;
for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
ret = create_pipeline(cmd_buffer->device, samples,
state->depth_decomp[samples_log2].pass,
state->depth_decomp[samples_log2].p_layout,
DEPTH_DECOMPRESS,
i,
&state->depth_decomp[samples_log2].decompress_pipeline[i]);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return NULL;
}
}
for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS, i,
&state->depth_decomp[samples_log2].decompress_pipeline[i]);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return NULL;
}
}
ret = create_pipeline(cmd_buffer->device, samples,
state->depth_decomp[samples_log2].pass,
state->depth_decomp[samples_log2].p_layout,
DEPTH_RESUMMARIZE,
0, /* unused */
&state->depth_decomp[samples_log2].resummarize_pipeline);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return NULL;
}
}
ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
0, /* unused */
&state->depth_decomp[samples_log2].resummarize_pipeline);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return NULL;
}
}
switch (op) {
case DEPTH_DECOMPRESS:
pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
break;
case DEPTH_RESUMMARIZE:
pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
break;
default:
unreachable("unknown operation");
}
switch (op) {
case DEPTH_DECOMPRESS:
pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
break;
case DEPTH_RESUMMARIZE:
pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
break;
default:
unreachable("unknown operation");
}
return pipeline;
return pipeline;
}
static void
radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *range,
int level, int layer)
radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, int level, int layer)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_state *state = &device->meta_state;
uint32_t samples_log2 = ffs(image->info.samples) - 1;
struct radv_image_view iview;
uint32_t width, height;
struct radv_device *device = cmd_buffer->device;
struct radv_meta_state *state = &device->meta_state;
uint32_t samples_log2 = ffs(image->info.samples) - 1;
struct radv_image_view iview;
uint32_t width, height;
width = radv_minify(image->info.width, range->baseMipLevel + level);
height = radv_minify(image->info.height, range->baseMipLevel + level);
width = radv_minify(image->info.width, range->baseMipLevel + level);
height = radv_minify(image->info.height, range->baseMipLevel + level);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
.viewType = radv_meta_get_view_type(image),
.format = image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
.baseMipLevel = range->baseMipLevel + level,
.levelCount = 1,
.baseArrayLayer = range->baseArrayLayer + layer,
.layerCount = 1,
},
}, NULL);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
.viewType = radv_meta_get_view_type(image),
.format = image->vk_format,
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
.baseMipLevel = range->baseMipLevel + level,
.levelCount = 1,
.baseArrayLayer = range->baseArrayLayer + layer,
.layerCount = 1,
},
},
NULL);
VkFramebuffer fb_h;
radv_CreateFramebuffer(
radv_device_to_handle(device),
&(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
.width = width,
.height = height,
.layers = 1},
&cmd_buffer->pool->alloc, &fb_h);
VkFramebuffer fb_h;
radv_CreateFramebuffer(radv_device_to_handle(device),
&(VkFramebufferCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkImageView[]) {
radv_image_view_to_handle(&iview)
},
.width = width,
.height = height,
.layers = 1
}, &cmd_buffer->pool->alloc, &fb_h);
radv_cmd_buffer_begin_render_pass(cmd_buffer,
&(VkRenderPassBeginInfo){
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = state->depth_decomp[samples_log2].pass,
.framebuffer = fb_h,
.renderArea = {.offset =
{
0,
0,
},
.extent =
{
width,
height,
}},
.clearValueCount = 0,
.pClearValues = NULL,
},
NULL);
radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
radv_cmd_buffer_begin_render_pass(cmd_buffer,
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = state->depth_decomp[samples_log2].pass,
.framebuffer = fb_h,
.renderArea = {
.offset = {
0,
0,
},
.extent = {
width,
height,
}
},
.clearValueCount = 0,
.pClearValues = NULL,
}, NULL);
radv_cmd_buffer_set_subpass(cmd_buffer,
&cmd_buffer->state.pass->subpasses[0]);
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
radv_cmd_buffer_end_render_pass(cmd_buffer);
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
radv_cmd_buffer_end_render_pass(cmd_buffer);
radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
&cmd_buffer->pool->alloc);
radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
}
static void radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs,
enum radv_depth_op op)
static void
radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
{
struct radv_meta_saved_state saved_state;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline *pipeline;
struct radv_meta_saved_state saved_state;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline *pipeline;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_GRAPHICS_PIPELINE |
RADV_META_SAVE_SAMPLE_LOCATIONS |
RADV_META_SAVE_PASS);
radv_meta_save(
&saved_state, cmd_buffer,
RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
pipeline = radv_get_depth_pipeline(cmd_buffer, image,
subresourceRange, op);
pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
*pipeline);
if (sample_locs) {
assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
if (sample_locs) {
assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
/* Set the sample locations specified during explicit or
* automatic layout transitions, otherwise the depth decompress
* pass uses the default HW locations.
*/
radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
.sampleLocationsPerPixel = sample_locs->per_pixel,
.sampleLocationGridSize = sample_locs->grid_size,
.sampleLocationsCount = sample_locs->count,
.pSampleLocations = sample_locs->locations,
});
}
/* Set the sample locations specified during explicit or
* automatic layout transitions, otherwise the depth decompress
* pass uses the default HW locations.
*/
radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
&(VkSampleLocationsInfoEXT){
.sampleLocationsPerPixel = sample_locs->per_pixel,
.sampleLocationGridSize = sample_locs->grid_size,
.sampleLocationsCount = sample_locs->count,
.pSampleLocations = sample_locs->locations,
});
}
for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
/* Do not decompress levels without HTILE. */
if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
continue;
/* Do not decompress levels without HTILE. */
if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
continue;
uint32_t width =
radv_minify(image->info.width,
subresourceRange->baseMipLevel + l);
uint32_t height =
radv_minify(image->info.height,
subresourceRange->baseMipLevel + l);
uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
radv_CmdSetViewport(cmd_buffer_h, 0, 1,
&(VkViewport) {
.x = 0,
.y = 0,
.width = width,
.height = height,
.minDepth = 0.0f,
.maxDepth = 1.0f
});
radv_CmdSetViewport(cmd_buffer_h, 0, 1,
&(VkViewport){.x = 0,
.y = 0,
.width = width,
.height = height,
.minDepth = 0.0f,
.maxDepth = 1.0f});
radv_CmdSetScissor(cmd_buffer_h, 0, 1,
&(VkRect2D) {
.offset = { 0, 0 },
.extent = { width, height },
});
radv_CmdSetScissor(cmd_buffer_h, 0, 1,
&(VkRect2D){
.offset = {0, 0},
.extent = {width, height},
});
for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
radv_process_depth_image_layer(cmd_buffer, image,
subresourceRange, l, s);
}
}
for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
}
}
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs)
void
radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs)
{
struct radv_barrier_data barrier = {0};
struct radv_barrier_data barrier = {0};
barrier.layout_transitions.depth_stencil_expand = 1;
radv_describe_layout_transition(cmd_buffer, &barrier);
barrier.layout_transitions.depth_stencil_expand = 1;
radv_describe_layout_transition(cmd_buffer, &barrier);
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
sample_locs, DEPTH_DECOMPRESS);
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
}
void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs)
void
radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs)
{
struct radv_barrier_data barrier = {0};
struct radv_barrier_data barrier = {0};
barrier.layout_transitions.depth_stencil_resummarize = 1;
radv_describe_layout_transition(cmd_buffer, &barrier);
barrier.layout_transitions.depth_stencil_resummarize = 1;
radv_describe_layout_transition(cmd_buffer, &barrier);
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
sample_locs, DEPTH_RESUMMARIZE);
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
}
File diff suppressed because it is too large Load Diff
+192 -222
View File
@@ -29,279 +29,249 @@
static nir_shader *
build_fmask_expand_compute_shader(struct radv_device *device, int samples)
{
const struct glsl_type *type =
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true,
GLSL_TYPE_FLOAT);
const struct glsl_type *img_type =
glsl_image_type(GLSL_SAMPLER_DIM_MS, true,
GLSL_TYPE_FLOAT);
const struct glsl_type *type =
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
b.shader->info.cs.local_size[0] = 8;
b.shader->info.cs.local_size[1] = 8;
b.shader->info.cs.local_size[2] = 1;
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
b.shader->info.cs.local_size[0] = 8;
b.shader->info.cs.local_size[1] = 8;
b.shader->info.cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
output_img->data.access = ACCESS_NON_READABLE;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
output_img->data.access = ACCESS_NON_READABLE;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size =
nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
nir_ssa_def *tex_coord = nir_vec3(&b, nir_channel(&b, global_id, 0),
nir_channel(&b, global_id, 1),
layer_id);
nir_ssa_def *tex_coord =
nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
nir_tex_instr *tex_instr[8];
for (uint32_t i = 0; i < samples; i++) {
tex_instr[i] = nir_tex_instr_create(b.shader, 3);
nir_tex_instr *tex_instr[8];
for (uint32_t i = 0; i < samples; i++) {
tex_instr[i] = nir_tex_instr_create(b.shader, 3);
nir_tex_instr *tex = tex_instr[i];
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(tex_coord);
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
tex->src[2].src_type = nir_tex_src_texture_deref;
tex->src[2].src = nir_src_for_ssa(input_img_deref);
tex->dest_type = nir_type_float32;
tex->is_array = true;
tex->coord_components = 3;
nir_tex_instr *tex = tex_instr[i];
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(tex_coord);
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
tex->src[2].src_type = nir_tex_src_texture_deref;
tex->src[2].src = nir_src_for_ssa(input_img_deref);
tex->dest_type = nir_type_float32;
tex->is_array = true;
tex->coord_components = 3;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
}
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
}
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0),
nir_channel(&b, tex_coord, 1),
nir_channel(&b, tex_coord, 2),
nir_imm_int(&b, 0));
nir_ssa_def *img_coord =
nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
nir_channel(&b, tex_coord, 2), nir_imm_int(&b, 0));
for (uint32_t i = 0; i < samples; i++) {
nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
for (uint32_t i = 0; i < samples; i++) {
nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i),
outval, nir_imm_int(&b, 0));
}
nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), outval,
nir_imm_int(&b, 0));
}
return b.shader;
return b.shader;
}
void
radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
const uint32_t samples = image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned layer_count = radv_get_layerCount(image, subresourceRange);
struct radv_image_view iview;
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
const uint32_t samples = image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned layer_count = radv_get_layerCount(image, subresourceRange);
struct radv_image_view iview;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE |
RADV_META_SAVE_DESCRIPTORS);
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
pipeline);
cmd_buffer->state.flush_bits |=
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT, image);
cmd_buffer->state.flush_bits |= radv_dst_access_flush(
cmd_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, image);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
.viewType = radv_meta_get_view_type(image),
.format = vk_format_no_srgb(image->vk_format),
.subresourceRange = {
.aspectMask = subresourceRange->aspectMask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = subresourceRange->baseArrayLayer,
.layerCount = layer_count,
},
}, NULL);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
.viewType = radv_meta_get_view_type(image),
.format = vk_format_no_srgb(image->vk_format),
.subresourceRange =
{
.aspectMask = subresourceRange->aspectMask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = subresourceRange->baseArrayLayer,
.layerCount = layer_count,
},
},
NULL);
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.fmask_expand.p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL
},
}
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL
},
}
}
});
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo =
(VkDescriptorImageInfo[]){
{.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL},
}},
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]){
{.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL},
}}});
radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
radv_meta_restore(&saved_state, cmd_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
/* Re-initialize FMASK in fully expanded mode. */
cmd_buffer->state.flush_bits |=
radv_init_fmask(cmd_buffer, image, subresourceRange);
/* Re-initialize FMASK in fully expanded mode. */
cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange);
}
void radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
void
radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
radv_DestroyPipeline(radv_device_to_handle(device),
state->fmask_expand.pipeline[i],
&state->alloc);
}
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->fmask_expand.p_layout,
&state->alloc);
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i],
&state->alloc);
}
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->fmask_expand.ds_layout,
&state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_expand.ds_layout,
&state->alloc);
}
static VkResult
create_fmask_expand_pipeline(struct radv_device *device,
int samples,
VkPipeline *pipeline)
create_fmask_expand_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
{
struct radv_meta_state *state = &device->meta_state;
VkResult result;
nir_shader *cs = build_fmask_expand_compute_shader(device, samples);;
struct radv_meta_state *state = &device->meta_state;
VkResult result;
nir_shader *cs = build_fmask_expand_compute_shader(device, samples);
;
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = state->fmask_expand.p_layout,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = state->fmask_expand.p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&state->cache),
1, &vk_pipeline_info, NULL,
pipeline);
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&state->cache), 1,
&vk_pipeline_info, NULL, pipeline);
ralloc_free(cs);
return result;
ralloc_free(cs);
return result;
}
VkResult
radv_device_init_meta_fmask_expand_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
VkResult result;
struct radv_meta_state *state = &device->meta_state;
VkResult result;
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]){
{.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
{.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&ds_create_info, &state->alloc,
&state->fmask_expand.ds_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
&state->alloc, &state->fmask_expand.ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo color_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &state->fmask_expand.ds_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = NULL,
};
VkPipelineLayoutCreateInfo color_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &state->fmask_expand.ds_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = NULL,
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&color_create_info, &state->alloc,
&state->fmask_expand.p_layout);
if (result != VK_SUCCESS)
goto fail;
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info,
&state->alloc, &state->fmask_expand.p_layout);
if (result != VK_SUCCESS)
goto fail;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
uint32_t samples = 1 << i;
result = create_fmask_expand_pipeline(device, samples,
&state->fmask_expand.pipeline[i]);
if (result != VK_SUCCESS)
goto fail;
}
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
uint32_t samples = 1 << i;
result = create_fmask_expand_pipeline(device, samples, &state->fmask_expand.pipeline[i]);
if (result != VK_SUCCESS)
goto fail;
}
return VK_SUCCESS;
return VK_SUCCESS;
fail:
radv_device_finish_meta_fmask_expand_state(device);
return result;
radv_device_finish_meta_fmask_expand_state(device);
return result;
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+204 -236
View File
@@ -21,321 +21,289 @@
* IN THE SOFTWARE.
*/
#include "radv_private.h"
#include "radv_shader.h"
#include "vk_format.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_vulkan.h"
#include "radv_private.h"
#include "radv_shader.h"
#include "vk_format.h"
struct ycbcr_state {
nir_builder *builder;
nir_ssa_def *image_size;
nir_tex_instr *origin_tex;
nir_deref_instr *tex_deref;
const struct radv_sampler_ycbcr_conversion *conversion;
nir_builder *builder;
nir_ssa_def *image_size;
nir_tex_instr *origin_tex;
nir_deref_instr *tex_deref;
const struct radv_sampler_ycbcr_conversion *conversion;
};
static nir_ssa_def *
get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
{
nir_builder *b = state->builder;
const struct glsl_type *type = texture->type;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
nir_builder *b = state->builder;
const struct glsl_type *type = texture->type;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
tex->op = nir_texop_txs;
tex->sampler_dim = glsl_get_sampler_dim(type);
tex->is_array = glsl_sampler_type_is_array(type);
tex->is_shadow = glsl_sampler_type_is_shadow(type);
tex->dest_type = nir_type_int32;
tex->op = nir_texop_txs;
tex->sampler_dim = glsl_get_sampler_dim(type);
tex->is_array = glsl_sampler_type_is_array(type);
tex->is_shadow = glsl_sampler_type_is_shadow(type);
tex->dest_type = nir_type_int32;
tex->src[0].src_type = nir_tex_src_texture_deref;
tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
tex->src[0].src_type = nir_tex_src_texture_deref;
tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
nir_ssa_dest_init(&tex->instr, &tex->dest,
nir_tex_instr_dest_size(tex), 32, NULL);
nir_builder_instr_insert(b, &tex->instr);
nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex), 32, NULL);
nir_builder_instr_insert(b, &tex->instr);
return nir_i2f32(b, &tex->dest.ssa);
return nir_i2f32(b, &tex->dest.ssa);
}
static nir_ssa_def *
implicit_downsampled_coord(nir_builder *b,
nir_ssa_def *value,
nir_ssa_def *max_value,
implicit_downsampled_coord(nir_builder *b, nir_ssa_def *value, nir_ssa_def *max_value,
int div_scale)
{
return nir_fadd(b,
value,
nir_fdiv(b,
nir_imm_float(b, 1.0f),
nir_fmul(b,
nir_imm_float(b, div_scale),
max_value)));
return nir_fadd(
b, value,
nir_fdiv(b, nir_imm_float(b, 1.0f), nir_fmul(b, nir_imm_float(b, div_scale), max_value)));
}
static nir_ssa_def *
implicit_downsampled_coords(struct ycbcr_state *state,
nir_ssa_def *old_coords)
implicit_downsampled_coords(struct ycbcr_state *state, nir_ssa_def *old_coords)
{
nir_builder *b = state->builder;
const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
nir_ssa_def *image_size = NULL;
nir_ssa_def *comp[4] = { NULL, };
enum pipe_video_chroma_format chroma_format = pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
const unsigned divisors[2] = {
chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1
};
nir_builder *b = state->builder;
const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
nir_ssa_def *image_size = NULL;
nir_ssa_def *comp[4] = {
NULL,
};
enum pipe_video_chroma_format chroma_format =
pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
const unsigned divisors[2] = {chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1};
for (int c = 0; c < old_coords->num_components; c++) {
if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
if (!image_size)
image_size = get_texture_size(state, state->tex_deref);
for (int c = 0; c < old_coords->num_components; c++) {
if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
if (!image_size)
image_size = get_texture_size(state, state->tex_deref);
comp[c] = implicit_downsampled_coord(b,
nir_channel(b, old_coords, c),
nir_channel(b, image_size, c),
divisors[c]);
} else {
comp[c] = nir_channel(b, old_coords, c);
}
}
comp[c] = implicit_downsampled_coord(b, nir_channel(b, old_coords, c),
nir_channel(b, image_size, c), divisors[c]);
} else {
comp[c] = nir_channel(b, old_coords, c);
}
}
return nir_vec(b, comp, old_coords->num_components);
return nir_vec(b, comp, old_coords->num_components);
}
static nir_ssa_def *
create_plane_tex_instr_implicit(struct ycbcr_state *state,
uint32_t plane)
create_plane_tex_instr_implicit(struct ycbcr_state *state, uint32_t plane)
{
nir_builder *b = state->builder;
nir_tex_instr *old_tex = state->origin_tex;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1);
for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
tex->src[i].src_type = old_tex->src[i].src_type;
nir_builder *b = state->builder;
nir_tex_instr *old_tex = state->origin_tex;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
tex->src[i].src_type = old_tex->src[i].src_type;
switch (old_tex->src[i].src_type) {
case nir_tex_src_coord:
if (plane && true/*state->conversion->chroma_reconstruction*/) {
assert(old_tex->src[i].src.is_ssa);
tex->src[i].src =
nir_src_for_ssa(implicit_downsampled_coords(state,
old_tex->src[i].src.ssa));
break;
}
/* fall through */
default:
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
break;
}
}
switch (old_tex->src[i].src_type) {
case nir_tex_src_coord:
if (plane && true /*state->conversion->chroma_reconstruction*/) {
assert(old_tex->src[i].src.is_ssa);
tex->src[i].src =
nir_src_for_ssa(implicit_downsampled_coords(state, old_tex->src[i].src.ssa));
break;
}
/* fall through */
default:
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
break;
}
}
tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
tex->sampler_dim = old_tex->sampler_dim;
tex->dest_type = old_tex->dest_type;
tex->is_array = old_tex->is_array;
tex->sampler_dim = old_tex->sampler_dim;
tex->dest_type = old_tex->dest_type;
tex->is_array = old_tex->is_array;
tex->op = old_tex->op;
tex->coord_components = old_tex->coord_components;
tex->is_new_style_shadow = old_tex->is_new_style_shadow;
tex->component = old_tex->component;
tex->op = old_tex->op;
tex->coord_components = old_tex->coord_components;
tex->is_new_style_shadow = old_tex->is_new_style_shadow;
tex->component = old_tex->component;
tex->texture_index = old_tex->texture_index;
tex->sampler_index = old_tex->sampler_index;
tex->texture_index = old_tex->texture_index;
tex->sampler_index = old_tex->sampler_index;
nir_ssa_dest_init(&tex->instr, &tex->dest,
old_tex->dest.ssa.num_components,
nir_dest_bit_size(old_tex->dest), NULL);
nir_builder_instr_insert(b, &tex->instr);
nir_ssa_dest_init(&tex->instr, &tex->dest, old_tex->dest.ssa.num_components,
nir_dest_bit_size(old_tex->dest), NULL);
nir_builder_instr_insert(b, &tex->instr);
return &tex->dest.ssa;
return &tex->dest.ssa;
}
struct swizzle_info {
unsigned plane[4];
unsigned swizzle[4];
unsigned plane[4];
unsigned swizzle[4];
};
static struct swizzle_info
get_plane_swizzles(VkFormat format)
{
int planes = vk_format_get_plane_count(format);
switch (planes) {
case 3:
return (struct swizzle_info) {
{2, 0, 1, 0},
{0, 0, 0, 3}
};
case 2:
return (struct swizzle_info) {
{1, 0, 1, 0},
{1, 0, 0, 3}
};
case 1:
return (struct swizzle_info) {
{0, 0, 0, 0},
{0, 1, 2, 3}
};
default:
unreachable("unhandled plane count for ycbcr swizzling");
}
int planes = vk_format_get_plane_count(format);
switch (planes) {
case 3:
return (struct swizzle_info){{2, 0, 1, 0}, {0, 0, 0, 3}};
case 2:
return (struct swizzle_info){{1, 0, 1, 0}, {1, 0, 0, 3}};
case 1:
return (struct swizzle_info){{0, 0, 0, 0}, {0, 1, 2, 3}};
default:
unreachable("unhandled plane count for ycbcr swizzling");
}
}
static nir_ssa_def *
build_swizzled_components(nir_builder *builder,
VkFormat format,
VkComponentMapping mapping,
build_swizzled_components(nir_builder *builder, VkFormat format, VkComponentMapping mapping,
nir_ssa_def **plane_values)
{
struct swizzle_info plane_swizzle = get_plane_swizzles(format);
enum pipe_swizzle swizzles[4];
nir_ssa_def *values[4];
struct swizzle_info plane_swizzle = get_plane_swizzles(format);
enum pipe_swizzle swizzles[4];
nir_ssa_def *values[4];
vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0,1,2,3}, swizzles);
vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0, 1, 2, 3}, swizzles);
nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
nir_ssa_def *one = nir_imm_float(builder, 1.0f);
nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
nir_ssa_def *one = nir_imm_float(builder, 1.0f);
for (unsigned i = 0; i < 4; ++i) {
switch(swizzles[i]) {
case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_W: {
unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
values[i] = nir_channel(builder,
plane_values[plane_swizzle.plane[channel]],
plane_swizzle.swizzle[channel]);
break;
}
case PIPE_SWIZZLE_0:
values[i] = zero;
break;
case PIPE_SWIZZLE_1:
values[i] = one;
break;
default:
unreachable("unhandled swizzle");
}
}
return nir_vec(builder, values, 4);
for (unsigned i = 0; i < 4; ++i) {
switch (swizzles[i]) {
case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_W: {
unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
values[i] = nir_channel(builder, plane_values[plane_swizzle.plane[channel]],
plane_swizzle.swizzle[channel]);
break;
}
case PIPE_SWIZZLE_0:
values[i] = zero;
break;
case PIPE_SWIZZLE_1:
values[i] = one;
break;
default:
unreachable("unhandled swizzle");
}
}
return nir_vec(builder, values, 4);
}
static bool
try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout,
nir_builder *builder,
try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout, nir_builder *builder,
nir_tex_instr *tex)
{
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
assert(deref_src_idx >= 0);
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
assert(deref_src_idx >= 0);
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
nir_variable *var = nir_deref_instr_get_variable(deref);
const struct radv_descriptor_set_layout *set_layout =
layout->set[var->data.descriptor_set].layout;
const struct radv_descriptor_set_binding_layout *binding =
&set_layout->binding[var->data.binding];
const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
nir_variable *var = nir_deref_instr_get_variable(deref);
const struct radv_descriptor_set_layout *set_layout =
layout->set[var->data.descriptor_set].layout;
const struct radv_descriptor_set_binding_layout *binding =
&set_layout->binding[var->data.binding];
const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
if (!ycbcr_samplers)
return false;
if (!ycbcr_samplers)
return false;
/* For the following instructions, we don't apply any change and let the
* instruction apply to the first plane.
*/
if (tex->op == nir_texop_txs ||
tex->op == nir_texop_query_levels ||
tex->op == nir_texop_lod)
return false;
/* For the following instructions, we don't apply any change and let the
* instruction apply to the first plane.
*/
if (tex->op == nir_texop_txs || tex->op == nir_texop_query_levels || tex->op == nir_texop_lod)
return false;
assert(tex->texture_index == 0);
unsigned array_index = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
if (!nir_src_is_const(deref->arr.index))
return false;
array_index = nir_src_as_uint(deref->arr.index);
array_index = MIN2(array_index, binding->array_size - 1);
}
const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
assert(tex->texture_index == 0);
unsigned array_index = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
if (!nir_src_is_const(deref->arr.index))
return false;
array_index = nir_src_as_uint(deref->arr.index);
array_index = MIN2(array_index, binding->array_size - 1);
}
const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
return false;
if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
return false;
struct ycbcr_state state = {
.builder = builder,
.origin_tex = tex,
.tex_deref = deref,
.conversion = ycbcr_sampler,
};
struct ycbcr_state state = {
.builder = builder,
.origin_tex = tex,
.tex_deref = deref,
.conversion = ycbcr_sampler,
};
builder->cursor = nir_before_instr(&tex->instr);
builder->cursor = nir_before_instr(&tex->instr);
VkFormat format = state.conversion->format;
const int plane_count = vk_format_get_plane_count(format);
nir_ssa_def *plane_values[3];
VkFormat format = state.conversion->format;
const int plane_count = vk_format_get_plane_count(format);
nir_ssa_def *plane_values[3];
for (int p = 0; p < plane_count; ++p) {
plane_values[p] = create_plane_tex_instr_implicit(&state, p);
}
for (int p = 0; p < plane_count; ++p) {
plane_values[p] = create_plane_tex_instr_implicit(&state, p);
}
nir_ssa_def *result = build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
VkFormat first_format = vk_format_get_plane_format(format, 0);
uint32_t bits = vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
/* TODO: swizzle and bpcs */
uint32_t bpcs[3] = {bits, bits, bits};
result = nir_convert_ycbcr_to_rgb(builder,
state.conversion->ycbcr_model,
state.conversion->ycbcr_range,
result,
bpcs);
}
nir_ssa_def *result =
build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
VkFormat first_format = vk_format_get_plane_format(format, 0);
uint32_t bits =
vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
/* TODO: swizzle and bpcs */
uint32_t bpcs[3] = {bits, bits, bits};
result = nir_convert_ycbcr_to_rgb(builder, state.conversion->ycbcr_model,
state.conversion->ycbcr_range, result, bpcs);
}
nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
nir_instr_remove(&tex->instr);
nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
nir_instr_remove(&tex->instr);
return true;
return true;
}
bool
radv_nir_lower_ycbcr_textures(nir_shader *shader,
const struct radv_pipeline_layout *layout)
radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout)
{
bool progress = false;
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_foreach_function (function, shader) {
if (!function->impl)
continue;
bool function_progress = false;
nir_builder builder;
nir_builder_init(&builder, function->impl);
bool function_progress = false;
nir_builder builder;
nir_builder_init(&builder, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_tex)
continue;
nir_foreach_block (block, function->impl) {
nir_foreach_instr_safe (instr, block) {
if (instr->type != nir_instr_type_tex)
continue;
nir_tex_instr *tex = nir_instr_as_tex(instr);
function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
}
}
nir_tex_instr *tex = nir_instr_as_tex(instr);
function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
}
}
if (function_progress) {
nir_metadata_preserve(function->impl,
nir_metadata_block_index |
nir_metadata_dominance);
}
if (function_progress) {
nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
}
progress |= function_progress;
}
progress |= function_progress;
}
return progress;
return progress;
}
File diff suppressed because it is too large Load Diff
+383 -420
View File
@@ -29,312 +29,291 @@
#include "vk_util.h"
static void
radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
const VkSubpassDependency2 *dep)
radv_render_pass_add_subpass_dep(struct radv_render_pass *pass, const VkSubpassDependency2 *dep)
{
uint32_t src = dep->srcSubpass;
uint32_t dst = dep->dstSubpass;
uint32_t src = dep->srcSubpass;
uint32_t dst = dep->dstSubpass;
/* Ignore subpass self-dependencies as they allow the app to call
* vkCmdPipelineBarrier() inside the render pass and the driver should
* only do the barrier when called, not when starting the render pass.
*/
if (src == dst)
return;
/* Ignore subpass self-dependencies as they allow the app to call
* vkCmdPipelineBarrier() inside the render pass and the driver should
* only do the barrier when called, not when starting the render pass.
*/
if (src == dst)
return;
/* Accumulate all ingoing external dependencies to the first subpass. */
if (src == VK_SUBPASS_EXTERNAL)
dst = 0;
/* Accumulate all ingoing external dependencies to the first subpass. */
if (src == VK_SUBPASS_EXTERNAL)
dst = 0;
if (dst == VK_SUBPASS_EXTERNAL) {
if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
pass->end_barrier.src_stage_mask |= dep->srcStageMask;
pass->end_barrier.src_access_mask |= dep->srcAccessMask;
pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
} else {
if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
}
if (dst == VK_SUBPASS_EXTERNAL) {
if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
pass->end_barrier.src_stage_mask |= dep->srcStageMask;
pass->end_barrier.src_access_mask |= dep->srcAccessMask;
pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
} else {
if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
}
}
static void
radv_render_pass_add_implicit_deps(struct radv_render_pass *pass)
{
/* From the Vulkan 1.0.39 spec:
*
* If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
* first subpass that uses an attachment, then an implicit subpass
* dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
* used in. The implicit subpass dependency only exists if there
* exists an automatic layout transition away from initialLayout.
* The subpass dependency operates as if defined with the
* following parameters:
*
* VkSubpassDependency implicitDependency = {
* .srcSubpass = VK_SUBPASS_EXTERNAL;
* .dstSubpass = firstSubpass; // First subpass attachment is used in
* .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
* .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
* .srcAccessMask = 0;
* .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
* .dependencyFlags = 0;
* };
*
* Similarly, if there is no subpass dependency from the last subpass
* that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
* subpass dependency exists from the last subpass it is used in to
* VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
* if there exists an automatic layout transition into finalLayout.
* The subpass dependency operates as if defined with the following
* parameters:
*
* VkSubpassDependency implicitDependency = {
* .srcSubpass = lastSubpass; // Last subpass attachment is used in
* .dstSubpass = VK_SUBPASS_EXTERNAL;
* .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
* .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
* .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
* .dstAccessMask = 0;
* .dependencyFlags = 0;
* };
*/
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
bool add_ingoing_dep = false, add_outgoing_dep = false;
/* From the Vulkan 1.0.39 spec:
*
* If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
* first subpass that uses an attachment, then an implicit subpass
* dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
* used in. The implicit subpass dependency only exists if there
* exists an automatic layout transition away from initialLayout.
* The subpass dependency operates as if defined with the
* following parameters:
*
* VkSubpassDependency implicitDependency = {
* .srcSubpass = VK_SUBPASS_EXTERNAL;
* .dstSubpass = firstSubpass; // First subpass attachment is used in
* .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
* .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
* .srcAccessMask = 0;
* .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
* .dependencyFlags = 0;
* };
*
* Similarly, if there is no subpass dependency from the last subpass
* that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
* subpass dependency exists from the last subpass it is used in to
* VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
* if there exists an automatic layout transition into finalLayout.
* The subpass dependency operates as if defined with the following
* parameters:
*
* VkSubpassDependency implicitDependency = {
* .srcSubpass = lastSubpass; // Last subpass attachment is used in
* .dstSubpass = VK_SUBPASS_EXTERNAL;
* .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
* .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
* .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
* .dstAccessMask = 0;
* .dependencyFlags = 0;
* };
*/
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
bool add_ingoing_dep = false, add_outgoing_dep = false;
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att =
&subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
struct radv_render_pass_attachment *pass_att =
&pass->attachments[subpass_att->attachment];
uint32_t initial_layout = pass_att->initial_layout;
uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
uint32_t final_layout = pass_att->final_layout;
uint32_t stencil_final_layout = pass_att->stencil_final_layout;
struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
uint32_t initial_layout = pass_att->initial_layout;
uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
uint32_t final_layout = pass_att->final_layout;
uint32_t stencil_final_layout = pass_att->stencil_final_layout;
/* The implicit subpass dependency only exists if
* there exists an automatic layout transition away
* from initialLayout.
*/
if (pass_att->first_subpass_idx == i &&
!subpass->has_ingoing_dep &&
((subpass_att->layout != initial_layout) ||
(subpass_att->layout != stencil_initial_layout))) {
add_ingoing_dep = true;
}
/* The implicit subpass dependency only exists if
* there exists an automatic layout transition away
* from initialLayout.
*/
if (pass_att->first_subpass_idx == i && !subpass->has_ingoing_dep &&
((subpass_att->layout != initial_layout) ||
(subpass_att->layout != stencil_initial_layout))) {
add_ingoing_dep = true;
}
/* The implicit subpass dependency only exists if
* there exists an automatic layout transition into
* finalLayout.
*/
if (pass_att->last_subpass_idx == i &&
!subpass->has_outgoing_dep &&
((subpass_att->layout != final_layout) ||
(subpass_att->layout != stencil_final_layout))) {
add_outgoing_dep = true;
}
}
/* The implicit subpass dependency only exists if
* there exists an automatic layout transition into
* finalLayout.
*/
if (pass_att->last_subpass_idx == i && !subpass->has_outgoing_dep &&
((subpass_att->layout != final_layout) ||
(subpass_att->layout != stencil_final_layout))) {
add_outgoing_dep = true;
}
}
if (add_ingoing_dep) {
const VkSubpassDependency2KHR implicit_ingoing_dep = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = i, /* first subpass attachment is used in */
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dependencyFlags = 0,
};
if (add_ingoing_dep) {
const VkSubpassDependency2KHR implicit_ingoing_dep = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = i, /* first subpass attachment is used in */
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.srcAccessMask = 0,
.dstAccessMask =
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dependencyFlags = 0,
};
radv_render_pass_add_subpass_dep(pass,
&implicit_ingoing_dep);
}
radv_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep);
}
if (add_outgoing_dep) {
const VkSubpassDependency2KHR implicit_outgoing_dep = {
.srcSubpass = i, /* last subpass attachment is used in */
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = 0,
.dependencyFlags = 0,
};
if (add_outgoing_dep) {
const VkSubpassDependency2KHR implicit_outgoing_dep = {
.srcSubpass = i, /* last subpass attachment is used in */
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
.srcAccessMask =
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = 0,
.dependencyFlags = 0,
};
radv_render_pass_add_subpass_dep(pass,
&implicit_outgoing_dep);
}
}
radv_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep);
}
}
}
static void
radv_render_pass_compile(struct radv_render_pass *pass)
{
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att =
&subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
struct radv_render_pass_attachment *pass_att =
&pass->attachments[subpass_att->attachment];
struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
}
}
pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
}
}
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
uint32_t color_sample_count = 1, depth_sample_count = 1;
for (uint32_t i = 0; i < pass->subpass_count; i++) {
struct radv_subpass *subpass = &pass->subpasses[i];
uint32_t color_sample_count = 1, depth_sample_count = 1;
/* We don't allow depth_stencil_attachment to be non-NULL and
* be VK_ATTACHMENT_UNUSED. This way something can just check
* for NULL and be guaranteed that they have a valid
* attachment.
*/
if (subpass->depth_stencil_attachment &&
subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
subpass->depth_stencil_attachment = NULL;
/* We don't allow depth_stencil_attachment to be non-NULL and
* be VK_ATTACHMENT_UNUSED. This way something can just check
* for NULL and be guaranteed that they have a valid
* attachment.
*/
if (subpass->depth_stencil_attachment &&
subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
subpass->depth_stencil_attachment = NULL;
if (subpass->ds_resolve_attachment &&
subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
subpass->ds_resolve_attachment = NULL;
if (subpass->ds_resolve_attachment &&
subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
subpass->ds_resolve_attachment = NULL;
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att =
&subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
for (uint32_t j = 0; j < subpass->attachment_count; j++) {
struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
struct radv_render_pass_attachment *pass_att =
&pass->attachments[subpass_att->attachment];
struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
if (i < pass_att->first_subpass_idx)
pass_att->first_subpass_idx = i;
pass_att->last_subpass_idx = i;
}
if (i < pass_att->first_subpass_idx)
pass_att->first_subpass_idx = i;
pass_att->last_subpass_idx = i;
}
subpass->has_color_att = false;
for (uint32_t j = 0; j < subpass->color_count; j++) {
struct radv_subpass_attachment *subpass_att =
&subpass->color_attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
subpass->has_color_att = false;
for (uint32_t j = 0; j < subpass->color_count; j++) {
struct radv_subpass_attachment *subpass_att = &subpass->color_attachments[j];
if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
subpass->has_color_att = true;
subpass->has_color_att = true;
struct radv_render_pass_attachment *pass_att =
&pass->attachments[subpass_att->attachment];
struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
color_sample_count = pass_att->samples;
}
color_sample_count = pass_att->samples;
}
if (subpass->depth_stencil_attachment) {
const uint32_t a =
subpass->depth_stencil_attachment->attachment;
struct radv_render_pass_attachment *pass_att =
&pass->attachments[a];
depth_sample_count = pass_att->samples;
}
if (subpass->depth_stencil_attachment) {
const uint32_t a = subpass->depth_stencil_attachment->attachment;
struct radv_render_pass_attachment *pass_att = &pass->attachments[a];
depth_sample_count = pass_att->samples;
}
subpass->max_sample_count = MAX2(color_sample_count,
depth_sample_count);
subpass->color_sample_count = color_sample_count;
subpass->depth_sample_count = depth_sample_count;
subpass->max_sample_count = MAX2(color_sample_count, depth_sample_count);
subpass->color_sample_count = color_sample_count;
subpass->depth_sample_count = depth_sample_count;
/* We have to handle resolve attachments specially */
subpass->has_color_resolve = false;
if (subpass->resolve_attachments) {
for (uint32_t j = 0; j < subpass->color_count; j++) {
struct radv_subpass_attachment *resolve_att =
&subpass->resolve_attachments[j];
/* We have to handle resolve attachments specially */
subpass->has_color_resolve = false;
if (subpass->resolve_attachments) {
for (uint32_t j = 0; j < subpass->color_count; j++) {
struct radv_subpass_attachment *resolve_att = &subpass->resolve_attachments[j];
if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
continue;
subpass->has_color_resolve = true;
}
}
subpass->has_color_resolve = true;
}
}
for (uint32_t j = 0; j < subpass->input_count; ++j) {
if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
continue;
for (uint32_t j = 0; j < subpass->input_count; ++j) {
if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
continue;
for (uint32_t k = 0; k < subpass->color_count; ++k) {
if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) {
subpass->input_attachments[j].in_render_loop = true;
subpass->color_attachments[k].in_render_loop = true;
}
}
for (uint32_t k = 0; k < subpass->color_count; ++k) {
if (subpass->color_attachments[k].attachment ==
subpass->input_attachments[j].attachment) {
subpass->input_attachments[j].in_render_loop = true;
subpass->color_attachments[k].in_render_loop = true;
}
}
if (subpass->depth_stencil_attachment &&
subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) {
subpass->input_attachments[j].in_render_loop = true;
subpass->depth_stencil_attachment->in_render_loop = true;
}
}
}
if (subpass->depth_stencil_attachment && subpass->depth_stencil_attachment->attachment ==
subpass->input_attachments[j].attachment) {
subpass->input_attachments[j].in_render_loop = true;
subpass->depth_stencil_attachment->in_render_loop = true;
}
}
}
}
static void
radv_destroy_render_pass(struct radv_device *device,
const VkAllocationCallbacks *pAllocator,
struct radv_render_pass *pass)
radv_destroy_render_pass(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
struct radv_render_pass *pass)
{
vk_object_base_finish(&pass->base);
vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
vk_free2(&device->vk.alloc, pAllocator, pass);
vk_object_base_finish(&pass->base);
vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
vk_free2(&device->vk.alloc, pAllocator, pass);
}
static unsigned
radv_num_subpass_attachments2(const VkSubpassDescription2 *desc)
{
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext,
SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
return desc->inputAttachmentCount +
desc->colorAttachmentCount +
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
(desc->pDepthStencilAttachment != NULL) +
(ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
return desc->inputAttachmentCount + desc->colorAttachmentCount +
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
(desc->pDepthStencilAttachment != NULL) +
(ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
}
static bool
vk_image_layout_depth_only(VkImageLayout layout)
{
switch (layout) {
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
return true;
default:
return false;
}
switch (layout) {
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
return true;
default:
return false;
}
}
/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2:
@@ -348,16 +327,15 @@ vk_image_layout_depth_only(VkImageLayout layout)
static VkImageLayout
stencil_ref_layout(const VkAttachmentReference2 *att_ref)
{
if (!vk_image_layout_depth_only(att_ref->layout))
return att_ref->layout;
if (!vk_image_layout_depth_only(att_ref->layout))
return att_ref->layout;
const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
vk_find_struct_const(att_ref->pNext,
ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
if (!stencil_ref)
return VK_IMAGE_LAYOUT_UNDEFINED;
const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
vk_find_struct_const(att_ref->pNext, ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
if (!stencil_ref)
return VK_IMAGE_LAYOUT_UNDEFINED;
return stencil_ref->stencilLayout;
return stencil_ref->stencilLayout;
}
/* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2:
@@ -372,212 +350,197 @@ stencil_ref_layout(const VkAttachmentReference2 *att_ref)
static VkImageLayout
stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final)
{
const struct util_format_description *desc = vk_format_description(att_desc->format);
if (!util_format_has_stencil(desc))
return VK_IMAGE_LAYOUT_UNDEFINED;
const struct util_format_description *desc = vk_format_description(att_desc->format);
if (!util_format_has_stencil(desc))
return VK_IMAGE_LAYOUT_UNDEFINED;
const VkImageLayout main_layout =
final ? att_desc->finalLayout : att_desc->initialLayout;
if (!vk_image_layout_depth_only(main_layout))
return main_layout;
const VkImageLayout main_layout = final ? att_desc->finalLayout : att_desc->initialLayout;
if (!vk_image_layout_depth_only(main_layout))
return main_layout;
const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
vk_find_struct_const(att_desc->pNext,
ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
assert(stencil_desc);
return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
vk_find_struct_const(att_desc->pNext, ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
assert(stencil_desc);
return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
}
VkResult radv_CreateRenderPass2(
VkDevice _device,
const VkRenderPassCreateInfo2* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkRenderPass* pRenderPass)
VkResult
radv_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkRenderPass *pRenderPass)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_render_pass *pass;
size_t size;
size_t attachments_offset;
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_render_pass *pass;
size_t size;
size_t attachments_offset;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
size = sizeof(*pass);
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
attachments_offset = size;
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
size = sizeof(*pass);
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
attachments_offset = size;
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
memset(pass, 0, size);
memset(pass, 0, size);
vk_object_base_init(&device->vk, &pass->base,
VK_OBJECT_TYPE_RENDER_PASS);
vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
pass->attachment_count = pCreateInfo->attachmentCount;
pass->subpass_count = pCreateInfo->subpassCount;
pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *) pass + attachments_offset);
pass->attachment_count = pCreateInfo->attachmentCount;
pass->subpass_count = pCreateInfo->subpassCount;
pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *)pass + attachments_offset);
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
struct radv_render_pass_attachment *att = &pass->attachments[i];
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
struct radv_render_pass_attachment *att = &pass->attachments[i];
att->format = pCreateInfo->pAttachments[i].format;
att->samples = pCreateInfo->pAttachments[i].samples;
att->load_op = pCreateInfo->pAttachments[i].loadOp;
att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
// att->store_op = pCreateInfo->pAttachments[i].storeOp;
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
}
uint32_t subpass_attachment_count = 0;
struct radv_subpass_attachment *p;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
subpass_attachment_count +=
radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
}
att->format = pCreateInfo->pAttachments[i].format;
att->samples = pCreateInfo->pAttachments[i].samples;
att->load_op = pCreateInfo->pAttachments[i].loadOp;
att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
// att->store_op = pCreateInfo->pAttachments[i].storeOp;
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
}
uint32_t subpass_attachment_count = 0;
struct radv_subpass_attachment *p;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
subpass_attachment_count += radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
}
if (subpass_attachment_count) {
pass->subpass_attachments =
vk_alloc2(&device->vk.alloc, pAllocator,
subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass->subpass_attachments == NULL) {
radv_destroy_render_pass(device, pAllocator, pass);
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
} else
pass->subpass_attachments = NULL;
if (subpass_attachment_count) {
pass->subpass_attachments =
vk_alloc2(&device->vk.alloc, pAllocator,
subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass->subpass_attachments == NULL) {
radv_destroy_render_pass(device, pAllocator, pass);
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
} else
pass->subpass_attachments = NULL;
p = pass->subpass_attachments;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
struct radv_subpass *subpass = &pass->subpasses[i];
p = pass->subpass_attachments;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
struct radv_subpass *subpass = &pass->subpasses[i];
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
subpass->attachment_count = radv_num_subpass_attachments2(desc);
subpass->attachments = p;
subpass->view_mask = desc->viewMask;
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
subpass->attachment_count = radv_num_subpass_attachments2(desc);
subpass->attachments = p;
subpass->view_mask = desc->viewMask;
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
p += desc->inputAttachmentCount;
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
p += desc->inputAttachmentCount;
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
subpass->input_attachments[j] = (struct radv_subpass_attachment) {
.attachment = desc->pInputAttachments[j].attachment,
.layout = desc->pInputAttachments[j].layout,
.stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
};
}
}
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
subpass->input_attachments[j] = (struct radv_subpass_attachment){
.attachment = desc->pInputAttachments[j].attachment,
.layout = desc->pInputAttachments[j].layout,
.stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
};
}
}
if (desc->colorAttachmentCount > 0) {
subpass->color_attachments = p;
p += desc->colorAttachmentCount;
if (desc->colorAttachmentCount > 0) {
subpass->color_attachments = p;
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->color_attachments[j] = (struct radv_subpass_attachment) {
.attachment = desc->pColorAttachments[j].attachment,
.layout = desc->pColorAttachments[j].layout,
};
}
}
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->color_attachments[j] = (struct radv_subpass_attachment){
.attachment = desc->pColorAttachments[j].attachment,
.layout = desc->pColorAttachments[j].layout,
};
}
}
if (desc->pResolveAttachments) {
subpass->resolve_attachments = p;
p += desc->colorAttachmentCount;
if (desc->pResolveAttachments) {
subpass->resolve_attachments = p;
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
.attachment = desc->pResolveAttachments[j].attachment,
.layout = desc->pResolveAttachments[j].layout,
};
}
}
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->resolve_attachments[j] = (struct radv_subpass_attachment){
.attachment = desc->pResolveAttachments[j].attachment,
.layout = desc->pResolveAttachments[j].layout,
};
}
}
if (desc->pDepthStencilAttachment) {
subpass->depth_stencil_attachment = p++;
if (desc->pDepthStencilAttachment) {
subpass->depth_stencil_attachment = p++;
*subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
.attachment = desc->pDepthStencilAttachment->attachment,
.layout = desc->pDepthStencilAttachment->layout,
.stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
};
}
*subpass->depth_stencil_attachment = (struct radv_subpass_attachment){
.attachment = desc->pDepthStencilAttachment->attachment,
.layout = desc->pDepthStencilAttachment->layout,
.stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
};
}
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext,
SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
subpass->ds_resolve_attachment = p++;
if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
subpass->ds_resolve_attachment = p++;
*subpass->ds_resolve_attachment = (struct radv_subpass_attachment) {
.attachment = ds_resolve->pDepthStencilResolveAttachment->attachment,
.layout = ds_resolve->pDepthStencilResolveAttachment->layout,
.stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
};
*subpass->ds_resolve_attachment = (struct radv_subpass_attachment){
.attachment = ds_resolve->pDepthStencilResolveAttachment->attachment,
.layout = ds_resolve->pDepthStencilResolveAttachment->layout,
.stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
};
subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
}
}
subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
}
}
for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
radv_render_pass_add_subpass_dep(pass,
&pCreateInfo->pDependencies[i]);
radv_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
/* Determine if the subpass has explicit dependencies from/to
* VK_SUBPASS_EXTERNAL.
*/
if (dep->srcSubpass == VK_SUBPASS_EXTERNAL &&
dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
}
/* Determine if the subpass has explicit dependencies from/to
* VK_SUBPASS_EXTERNAL.
*/
if (dep->srcSubpass == VK_SUBPASS_EXTERNAL && dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
}
if (dep->dstSubpass == VK_SUBPASS_EXTERNAL &&
dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
}
}
if (dep->dstSubpass == VK_SUBPASS_EXTERNAL && dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
}
}
radv_render_pass_compile(pass);
radv_render_pass_compile(pass);
radv_render_pass_add_implicit_deps(pass);
radv_render_pass_add_implicit_deps(pass);
*pRenderPass = radv_render_pass_to_handle(pass);
*pRenderPass = radv_render_pass_to_handle(pass);
return VK_SUCCESS;
return VK_SUCCESS;
}
void radv_DestroyRenderPass(
VkDevice _device,
VkRenderPass _pass,
const VkAllocationCallbacks* pAllocator)
void
radv_DestroyRenderPass(VkDevice _device, VkRenderPass _pass,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
if (!_pass)
return;
if (!_pass)
return;
radv_destroy_render_pass(device, pAllocator, pass);
radv_destroy_render_pass(device, pAllocator, pass);
}
void radv_GetRenderAreaGranularity(
VkDevice device,
VkRenderPass renderPass,
VkExtent2D* pGranularity)
void
radv_GetRenderAreaGranularity(VkDevice device, VkRenderPass renderPass, VkExtent2D *pGranularity)
{
pGranularity->width = 1;
pGranularity->height = 1;
pGranularity->width = 1;
pGranularity->height = 1;
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+1624 -1756
View File
File diff suppressed because it is too large Load Diff
+1370 -1502
View File
File diff suppressed because it is too large Load Diff
+202 -226
View File
@@ -29,337 +29,313 @@
#ifndef RADV_RADEON_WINSYS_H
#define RADV_RADEON_WINSYS_H
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util/u_math.h"
#include "util/u_memory.h"
#include <vulkan/vulkan.h>
#include "amd_family.h"
#include "util/u_memory.h"
#include "util/u_math.h"
struct radeon_info;
struct ac_surf_info;
struct radeon_surf;
enum radeon_bo_domain { /* bitfield */
RADEON_DOMAIN_GTT = 2,
RADEON_DOMAIN_VRAM = 4,
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
RADEON_DOMAIN_GDS = 8,
RADEON_DOMAIN_OA = 16,
RADEON_DOMAIN_GTT = 2,
RADEON_DOMAIN_VRAM = 4,
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
RADEON_DOMAIN_GDS = 8,
RADEON_DOMAIN_OA = 16,
};
enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
RADEON_FLAG_VIRTUAL = (1 << 3),
RADEON_FLAG_VA_UNCACHED = (1 << 4),
RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
RADEON_FLAG_READ_ONLY = (1 << 7),
RADEON_FLAG_32BIT = (1 << 8),
RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
RADEON_FLAG_ZERO_VRAM = (1 << 10),
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
RADEON_FLAG_VIRTUAL = (1 << 3),
RADEON_FLAG_VA_UNCACHED = (1 << 4),
RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
RADEON_FLAG_READ_ONLY = (1 << 7),
RADEON_FLAG_32BIT = (1 << 8),
RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
RADEON_FLAG_ZERO_VRAM = (1 << 10),
};
enum radeon_ctx_priority {
RADEON_CTX_PRIORITY_INVALID = -1,
RADEON_CTX_PRIORITY_LOW = 0,
RADEON_CTX_PRIORITY_MEDIUM,
RADEON_CTX_PRIORITY_HIGH,
RADEON_CTX_PRIORITY_REALTIME,
RADEON_CTX_PRIORITY_INVALID = -1,
RADEON_CTX_PRIORITY_LOW = 0,
RADEON_CTX_PRIORITY_MEDIUM,
RADEON_CTX_PRIORITY_HIGH,
RADEON_CTX_PRIORITY_REALTIME,
};
enum radeon_value_id {
RADEON_ALLOCATED_VRAM,
RADEON_ALLOCATED_VRAM_VIS,
RADEON_ALLOCATED_GTT,
RADEON_TIMESTAMP,
RADEON_NUM_BYTES_MOVED,
RADEON_NUM_EVICTIONS,
RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
RADEON_VRAM_USAGE,
RADEON_VRAM_VIS_USAGE,
RADEON_GTT_USAGE,
RADEON_GPU_TEMPERATURE,
RADEON_CURRENT_SCLK,
RADEON_CURRENT_MCLK,
RADEON_ALLOCATED_VRAM,
RADEON_ALLOCATED_VRAM_VIS,
RADEON_ALLOCATED_GTT,
RADEON_TIMESTAMP,
RADEON_NUM_BYTES_MOVED,
RADEON_NUM_EVICTIONS,
RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
RADEON_VRAM_USAGE,
RADEON_VRAM_VIS_USAGE,
RADEON_GTT_USAGE,
RADEON_GPU_TEMPERATURE,
RADEON_CURRENT_SCLK,
RADEON_CURRENT_MCLK,
};
struct radeon_cmdbuf {
unsigned cdw; /* Number of used dwords. */
unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The base pointer of the chunk. */
unsigned cdw; /* Number of used dwords. */
unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The base pointer of the chunk. */
};
#define RADEON_SURF_TYPE_MASK 0xFF
#define RADEON_SURF_TYPE_SHIFT 0
#define RADEON_SURF_TYPE_1D 0
#define RADEON_SURF_TYPE_2D 1
#define RADEON_SURF_TYPE_3D 2
#define RADEON_SURF_TYPE_CUBEMAP 3
#define RADEON_SURF_TYPE_1D_ARRAY 4
#define RADEON_SURF_TYPE_2D_ARRAY 5
#define RADEON_SURF_MODE_MASK 0xFF
#define RADEON_SURF_MODE_SHIFT 8
#define RADEON_SURF_TYPE_MASK 0xFF
#define RADEON_SURF_TYPE_SHIFT 0
#define RADEON_SURF_TYPE_1D 0
#define RADEON_SURF_TYPE_2D 1
#define RADEON_SURF_TYPE_3D 2
#define RADEON_SURF_TYPE_CUBEMAP 3
#define RADEON_SURF_TYPE_1D_ARRAY 4
#define RADEON_SURF_TYPE_2D_ARRAY 5
#define RADEON_SURF_MODE_MASK 0xFF
#define RADEON_SURF_MODE_SHIFT 8
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
#define RADEON_SURF_GET(v, field) \
(((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK)
#define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT)
#define RADEON_SURF_CLR(v, field) \
((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT))
enum radeon_bo_layout {
RADEON_LAYOUT_LINEAR = 0,
RADEON_LAYOUT_TILED,
RADEON_LAYOUT_SQUARETILED,
RADEON_LAYOUT_LINEAR = 0,
RADEON_LAYOUT_TILED,
RADEON_LAYOUT_SQUARETILED,
RADEON_LAYOUT_UNKNOWN
RADEON_LAYOUT_UNKNOWN
};
/* Tiling info for display code, DRI sharing, and other data. */
struct radeon_bo_metadata {
/* Tiling flags describing the texture layout for display code
* and DRI sharing.
*/
union {
struct {
enum radeon_bo_layout microtile;
enum radeon_bo_layout macrotile;
unsigned pipe_config;
unsigned bankw;
unsigned bankh;
unsigned tile_split;
unsigned mtilea;
unsigned num_banks;
unsigned stride;
bool scanout;
} legacy;
/* Tiling flags describing the texture layout for display code
* and DRI sharing.
*/
union {
struct {
enum radeon_bo_layout microtile;
enum radeon_bo_layout macrotile;
unsigned pipe_config;
unsigned bankw;
unsigned bankh;
unsigned tile_split;
unsigned mtilea;
unsigned num_banks;
unsigned stride;
bool scanout;
} legacy;
struct {
/* surface flags */
unsigned swizzle_mode:5;
bool scanout;
uint32_t dcc_offset_256b;
uint32_t dcc_pitch_max;
bool dcc_independent_64b_blocks;
bool dcc_independent_128b_blocks;
unsigned dcc_max_compressed_block_size;
} gfx9;
} u;
struct {
/* surface flags */
unsigned swizzle_mode : 5;
bool scanout;
uint32_t dcc_offset_256b;
uint32_t dcc_pitch_max;
bool dcc_independent_64b_blocks;
bool dcc_independent_128b_blocks;
unsigned dcc_max_compressed_block_size;
} gfx9;
} u;
/* Additional metadata associated with the buffer, in bytes.
* The maximum size is 64 * 4. This is opaque for the winsys & kernel.
* Supported by amdgpu only.
*/
uint32_t size_metadata;
uint32_t metadata[64];
/* Additional metadata associated with the buffer, in bytes.
* The maximum size is 64 * 4. This is opaque for the winsys & kernel.
* Supported by amdgpu only.
*/
uint32_t size_metadata;
uint32_t metadata[64];
};
struct radeon_winsys_ctx;
struct radeon_winsys_bo {
uint64_t va;
bool is_local;
bool vram_no_cpu_access;
bool use_global_list;
enum radeon_bo_domain initial_domain;
uint64_t va;
bool is_local;
bool vram_no_cpu_access;
bool use_global_list;
enum radeon_bo_domain initial_domain;
};
struct radv_winsys_sem_counts {
uint32_t syncobj_count;
uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
uint32_t timeline_syncobj_count;
uint32_t *syncobj;
uint64_t *points;
uint32_t syncobj_count;
uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
uint32_t timeline_syncobj_count;
uint32_t *syncobj;
uint64_t *points;
};
struct radv_winsys_sem_info {
bool cs_emit_signal;
bool cs_emit_wait;
struct radv_winsys_sem_counts wait;
struct radv_winsys_sem_counts signal;
bool cs_emit_signal;
bool cs_emit_wait;
struct radv_winsys_sem_counts wait;
struct radv_winsys_sem_counts signal;
};
struct radv_winsys_bo_list {
struct radeon_winsys_bo **bos;
unsigned count;
struct radeon_winsys_bo **bos;
unsigned count;
};
/* Kernel effectively allows 0-31. This sets some priorities for fixed
* functionality buffers */
enum {
RADV_BO_PRIORITY_APPLICATION_MAX = 28,
RADV_BO_PRIORITY_APPLICATION_MAX = 28,
/* virtual buffers have 0 priority since the priority is not used. */
RADV_BO_PRIORITY_VIRTUAL = 0,
/* virtual buffers have 0 priority since the priority is not used. */
RADV_BO_PRIORITY_VIRTUAL = 0,
RADV_BO_PRIORITY_METADATA = 10,
/* This should be considerably lower than most of the stuff below,
* but how much lower is hard to say since we don't know application
* assignments. Put it pretty high since it is GTT anyway. */
RADV_BO_PRIORITY_QUERY_POOL = 29,
RADV_BO_PRIORITY_METADATA = 10,
/* This should be considerably lower than most of the stuff below,
* but how much lower is hard to say since we don't know application
* assignments. Put it pretty high since it is GTT anyway. */
RADV_BO_PRIORITY_QUERY_POOL = 29,
RADV_BO_PRIORITY_DESCRIPTOR = 30,
RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
RADV_BO_PRIORITY_FENCE = 30,
RADV_BO_PRIORITY_SHADER = 31,
RADV_BO_PRIORITY_SCRATCH = 31,
RADV_BO_PRIORITY_CS = 31,
RADV_BO_PRIORITY_DESCRIPTOR = 30,
RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
RADV_BO_PRIORITY_FENCE = 30,
RADV_BO_PRIORITY_SHADER = 31,
RADV_BO_PRIORITY_SCRATCH = 31,
RADV_BO_PRIORITY_CS = 31,
};
struct radeon_winsys {
void (*destroy)(struct radeon_winsys *ws);
void (*destroy)(struct radeon_winsys *ws);
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
uint64_t (*query_value)(struct radeon_winsys *ws,
enum radeon_value_id value);
uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
unsigned num_registers, uint32_t *out);
bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
uint32_t *out);
const char *(*get_chip_name)(struct radeon_winsys *ws);
const char *(*get_chip_name)(struct radeon_winsys *ws);
struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws,
uint64_t size,
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags,
unsigned priority);
struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws, uint64_t size,
unsigned alignment, enum radeon_bo_domain domain,
enum radeon_bo_flag flags, unsigned priority);
void (*buffer_destroy)(struct radeon_winsys *ws,
struct radeon_winsys_bo *bo);
void *(*buffer_map)(struct radeon_winsys_bo *bo);
void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
void *(*buffer_map)(struct radeon_winsys_bo *bo);
struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
void *pointer,
uint64_t size,
unsigned priority);
struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer,
uint64_t size, unsigned priority);
struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
int fd,
unsigned priority,
uint64_t *alloc_size);
struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority,
uint64_t *alloc_size);
bool (*buffer_get_fd)(struct radeon_winsys *ws,
struct radeon_winsys_bo *bo,
int *fd);
bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd);
bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
enum radeon_bo_domain *domains,
enum radeon_bo_flag *flags);
bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
enum radeon_bo_domain *domains, enum radeon_bo_flag *flags);
void (*buffer_unmap)(struct radeon_winsys_bo *bo);
void (*buffer_unmap)(struct radeon_winsys_bo *bo);
void (*buffer_set_metadata)(struct radeon_winsys *ws,
struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
void (*buffer_get_metadata)(struct radeon_winsys *ws,
struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws,
struct radeon_winsys_bo *parent,
uint64_t offset, uint64_t size,
struct radeon_winsys_bo *bo, uint64_t bo_offset);
VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent,
uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo,
uint64_t bo_offset);
VkResult (*buffer_make_resident)(struct radeon_winsys *ws,
struct radeon_winsys_bo *bo,
bool resident);
VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
bool resident);
VkResult (*ctx_create)(struct radeon_winsys *ws,
enum radeon_ctx_priority priority,
struct radeon_winsys_ctx **ctx);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority,
struct radeon_winsys_ctx **ctx);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type, int ring_index);
bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, int ring_index);
struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws,
enum ring_type ring_type);
struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
void (*cs_destroy)(struct radeon_cmdbuf *cs);
void (*cs_destroy)(struct radeon_cmdbuf *cs);
void (*cs_reset)(struct radeon_cmdbuf *cs);
void (*cs_reset)(struct radeon_cmdbuf *cs);
VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
void (*cs_grow)(struct radeon_cmdbuf * cs, size_t min_size);
void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size);
VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx,
int queue_index,
struct radeon_cmdbuf **cs_array,
unsigned cs_count,
struct radeon_cmdbuf *initial_preamble_cs,
struct radeon_cmdbuf *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info,
bool can_patch);
VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, int queue_index,
struct radeon_cmdbuf **cs_array, unsigned cs_count,
struct radeon_cmdbuf *initial_preamble_cs,
struct radeon_cmdbuf *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info, bool can_patch);
void (*cs_add_buffer)(struct radeon_cmdbuf *cs,
struct radeon_winsys_bo *bo);
void (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo);
void (*cs_execute_secondary)(struct radeon_cmdbuf *parent,
struct radeon_cmdbuf *child);
void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child);
void (*cs_dump)(struct radeon_cmdbuf *cs, FILE* file, const int *trace_ids, int trace_id_count);
void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count);
void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
int (*surface_init)(struct radeon_winsys *ws,
const struct ac_surf_info *surf_info,
struct radeon_surf *surf);
int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info,
struct radeon_surf *surf);
int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled,
uint32_t *handle);
void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled, uint32_t *handle);
void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
bool wait_all, uint64_t timeout);
bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, const uint64_t *points,
uint32_t handle_count, bool wait_all, bool available, uint64_t timeout);
void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
bool wait_all, uint64_t timeout);
bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles,
const uint64_t *points, uint32_t handle_count, bool wait_all,
bool available, uint64_t timeout);
int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
/* Note that this, unlike the normal import, uses an existing syncobj. */
int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
/* Note that this, unlike the normal import, uses an existing syncobj. */
int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
};
static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
static inline void
radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
{
cs->buf[cs->cdw++] = value;
cs->buf[cs->cdw++] = value;
}
static inline void radeon_emit_array(struct radeon_cmdbuf *cs,
const uint32_t *values, unsigned count)
static inline void
radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, unsigned count)
{
memcpy(cs->buf + cs->cdw, values, count * 4);
cs->cdw += count;
memcpy(cs->buf + cs->cdw, values, count * 4);
cs->cdw += count;
}
static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo)
static inline uint64_t
radv_buffer_get_va(struct radeon_winsys_bo *bo)
{
return bo->va;
return bo->va;
}
static inline void radv_cs_add_buffer(struct radeon_winsys *ws,
struct radeon_cmdbuf *cs,
struct radeon_winsys_bo *bo)
static inline void
radv_cs_add_buffer(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo)
{
if (bo->use_global_list)
return;
if (bo->use_global_list)
return;
ws->cs_add_buffer(cs, bo);
ws->cs_add_buffer(cs, bo);
}
enum radeon_bo_domain radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest);
+1385 -1488
View File
File diff suppressed because it is too large Load Diff
+387 -435
View File
@@ -35,570 +35,522 @@
#include "radv_constants.h"
#include "nir/nir.h"
#include "vulkan/vulkan.h"
#include "vulkan/util/vk_object.h"
#include "vulkan/util/vk_shader_module.h"
#include "aco_interface.h"
#include "vulkan/vulkan.h"
#define RADV_VERT_ATTRIB_MAX MAX2(VERT_ATTRIB_MAX, VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
struct radv_device;
struct radv_pipeline;
struct radv_pipeline_cache;
struct radv_pipeline_key;
struct radv_vs_out_key {
uint32_t as_es:1;
uint32_t as_ls:1;
uint32_t as_ngg:1;
uint32_t as_ngg_passthrough:1;
uint32_t export_prim_id:1;
uint32_t export_layer_id:1;
uint32_t export_clip_dists:1;
uint32_t export_viewport_index:1;
uint32_t as_es : 1;
uint32_t as_ls : 1;
uint32_t as_ngg : 1;
uint32_t as_ngg_passthrough : 1;
uint32_t export_prim_id : 1;
uint32_t export_layer_id : 1;
uint32_t export_clip_dists : 1;
uint32_t export_viewport_index : 1;
};
struct radv_vs_variant_key {
struct radv_vs_out_key out;
struct radv_vs_out_key out;
uint32_t instance_rate_inputs;
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
uint32_t instance_rate_inputs;
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
/* For some formats the channels have to be shuffled. */
uint32_t post_shuffle;
/* For some formats the channels have to be shuffled. */
uint32_t post_shuffle;
/* Output primitive type. */
uint8_t outprim;
/* Output primitive type. */
uint8_t outprim;
};
struct radv_tes_variant_key {
struct radv_vs_out_key out;
struct radv_vs_out_key out;
};
struct radv_tcs_variant_key {
struct radv_vs_variant_key vs_key;
unsigned primitive_mode;
unsigned input_vertices;
struct radv_vs_variant_key vs_key;
unsigned primitive_mode;
unsigned input_vertices;
};
struct radv_fs_variant_key {
uint32_t col_format;
uint8_t log2_ps_iter_samples;
uint8_t num_samples;
uint32_t is_int8;
uint32_t is_int10;
uint32_t col_format;
uint8_t log2_ps_iter_samples;
uint8_t num_samples;
uint32_t is_int8;
uint32_t is_int10;
};
struct radv_cs_variant_key {
uint8_t subgroup_size;
uint8_t subgroup_size;
};
struct radv_shader_variant_key {
union {
struct radv_vs_variant_key vs;
struct radv_fs_variant_key fs;
struct radv_tes_variant_key tes;
struct radv_tcs_variant_key tcs;
struct radv_cs_variant_key cs;
union {
struct radv_vs_variant_key vs;
struct radv_fs_variant_key fs;
struct radv_tes_variant_key tes;
struct radv_tcs_variant_key tcs;
struct radv_cs_variant_key cs;
/* A common prefix of the vs and tes keys. */
struct radv_vs_out_key vs_common_out;
};
bool has_multiview_view_index;
/* A common prefix of the vs and tes keys. */
struct radv_vs_out_key vs_common_out;
};
bool has_multiview_view_index;
};
enum radv_compiler_debug_level {
RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
RADV_COMPILER_DEBUG_LEVEL_ERROR,
RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
RADV_COMPILER_DEBUG_LEVEL_ERROR,
};
struct radv_nir_compiler_options {
struct radv_pipeline_layout *layout;
struct radv_shader_variant_key key;
bool explicit_scratch_args;
bool clamp_shadow_reference;
bool robust_buffer_access;
bool robust_buffer_access2;
bool adjust_frag_coord_z;
bool dump_shader;
bool dump_preoptir;
bool record_ir;
bool record_stats;
bool check_ir;
bool has_ls_vgpr_init_bug;
bool has_image_load_dcc_bug;
bool use_ngg_streamout;
bool enable_mrt_output_nan_fixup;
bool disable_optimizations; /* only used by ACO */
bool wgp_mode;
enum radeon_family family;
enum chip_class chip_class;
const struct radeon_info *info;
uint32_t tess_offchip_block_dw_size;
uint32_t address32_hi;
uint8_t force_vrs_rates;
struct radv_pipeline_layout *layout;
struct radv_shader_variant_key key;
bool explicit_scratch_args;
bool clamp_shadow_reference;
bool robust_buffer_access;
bool robust_buffer_access2;
bool adjust_frag_coord_z;
bool dump_shader;
bool dump_preoptir;
bool record_ir;
bool record_stats;
bool check_ir;
bool has_ls_vgpr_init_bug;
bool has_image_load_dcc_bug;
bool use_ngg_streamout;
bool enable_mrt_output_nan_fixup;
bool disable_optimizations; /* only used by ACO */
bool wgp_mode;
enum radeon_family family;
enum chip_class chip_class;
const struct radeon_info *info;
uint32_t tess_offchip_block_dw_size;
uint32_t address32_hi;
uint8_t force_vrs_rates;
struct {
void (*func)(void *private_data,
enum radv_compiler_debug_level level,
const char *message);
void *private_data;
} debug;
struct {
void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
void *private_data;
} debug;
};
enum radv_ud_index {
AC_UD_SCRATCH_RING_OFFSETS = 0,
AC_UD_PUSH_CONSTANTS = 1,
AC_UD_INLINE_PUSH_CONSTANTS = 2,
AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
AC_UD_VIEW_INDEX = 4,
AC_UD_STREAMOUT_BUFFERS = 5,
AC_UD_NGG_GS_STATE = 6,
AC_UD_SHADER_START = 7,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_MAX_UD,
AC_UD_PS_MAX_UD,
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_MAX_UD,
AC_UD_GS_MAX_UD,
AC_UD_TCS_MAX_UD,
AC_UD_TES_MAX_UD,
AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
AC_UD_SCRATCH_RING_OFFSETS = 0,
AC_UD_PUSH_CONSTANTS = 1,
AC_UD_INLINE_PUSH_CONSTANTS = 2,
AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
AC_UD_VIEW_INDEX = 4,
AC_UD_STREAMOUT_BUFFERS = 5,
AC_UD_NGG_GS_STATE = 6,
AC_UD_SHADER_START = 7,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_MAX_UD,
AC_UD_PS_MAX_UD,
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_MAX_UD,
AC_UD_GS_MAX_UD,
AC_UD_TCS_MAX_UD,
AC_UD_TES_MAX_UD,
AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
};
struct radv_stream_output {
uint8_t location;
uint8_t buffer;
uint16_t offset;
uint8_t component_mask;
uint8_t stream;
uint8_t location;
uint8_t buffer;
uint16_t offset;
uint8_t component_mask;
uint8_t stream;
};
struct radv_streamout_info {
uint16_t num_outputs;
struct radv_stream_output outputs[MAX_SO_OUTPUTS];
uint16_t strides[MAX_SO_BUFFERS];
uint32_t enabled_stream_buffers_mask;
uint16_t num_outputs;
struct radv_stream_output outputs[MAX_SO_OUTPUTS];
uint16_t strides[MAX_SO_BUFFERS];
uint32_t enabled_stream_buffers_mask;
};
struct radv_userdata_info {
int8_t sgpr_idx;
uint8_t num_sgprs;
int8_t sgpr_idx;
uint8_t num_sgprs;
};
struct radv_userdata_locations {
struct radv_userdata_info descriptor_sets[MAX_SETS];
struct radv_userdata_info shader_data[AC_UD_MAX_UD];
uint32_t descriptor_sets_enabled;
struct radv_userdata_info descriptor_sets[MAX_SETS];
struct radv_userdata_info shader_data[AC_UD_MAX_UD];
uint32_t descriptor_sets_enabled;
};
struct radv_vs_output_info {
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
uint8_t clip_dist_mask;
uint8_t cull_dist_mask;
uint8_t param_exports;
bool writes_pointsize;
bool writes_layer;
bool writes_viewport_index;
bool writes_primitive_shading_rate;
bool export_prim_id;
unsigned pos_exports;
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
uint8_t clip_dist_mask;
uint8_t cull_dist_mask;
uint8_t param_exports;
bool writes_pointsize;
bool writes_layer;
bool writes_viewport_index;
bool writes_primitive_shading_rate;
bool export_prim_id;
unsigned pos_exports;
};
struct radv_es_output_info {
uint32_t esgs_itemsize;
uint32_t esgs_itemsize;
};
struct gfx9_gs_info {
uint32_t vgt_gs_onchip_cntl;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_esgs_ring_itemsize;
uint32_t lds_size;
uint32_t vgt_gs_onchip_cntl;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_esgs_ring_itemsize;
uint32_t lds_size;
};
struct gfx10_ngg_info {
uint16_t ngg_emit_size; /* in dwords */
uint32_t hw_max_esverts;
uint32_t max_gsprims;
uint32_t max_out_verts;
uint32_t prim_amp_factor;
uint32_t vgt_esgs_ring_itemsize;
uint32_t esgs_ring_size;
bool max_vert_out_per_gs_instance;
uint16_t ngg_emit_size; /* in dwords */
uint32_t hw_max_esverts;
uint32_t max_gsprims;
uint32_t max_out_verts;
uint32_t prim_amp_factor;
uint32_t vgt_esgs_ring_itemsize;
uint32_t esgs_ring_size;
bool max_vert_out_per_gs_instance;
};
struct radv_shader_info {
bool loads_push_constants;
bool loads_dynamic_offsets;
uint8_t min_push_constant_used;
uint8_t max_push_constant_used;
bool has_only_32bit_push_constants;
bool has_indirect_push_constants;
uint8_t num_inline_push_consts;
uint8_t base_inline_push_consts;
uint32_t desc_set_used_mask;
bool needs_multiview_view_index;
bool uses_invocation_id;
bool uses_prim_id;
uint8_t wave_size;
uint8_t ballot_bit_size;
struct radv_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
unsigned num_input_sgprs;
unsigned num_input_vgprs;
unsigned private_mem_vgprs;
bool need_indirect_descriptor_sets;
bool is_ngg;
bool is_ngg_passthrough;
uint32_t num_tess_patches;
struct {
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
bool has_vertex_buffers; /* needs vertex buffers and base/start */
bool needs_draw_id;
bool needs_instance_id;
struct radv_vs_output_info outinfo;
struct radv_es_output_info es_info;
bool as_es;
bool as_ls;
bool export_prim_id;
bool tcs_in_out_eq;
uint64_t tcs_temp_only_input_mask;
uint8_t num_linked_outputs;
bool needs_base_instance;
} vs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
uint8_t num_stream_output_components[4];
uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
uint8_t max_stream;
unsigned gsvs_vertex_size;
unsigned max_gsvs_emit_size;
unsigned vertices_in;
unsigned vertices_out;
unsigned output_prim;
unsigned invocations;
unsigned es_type; /* GFX9: VS or TES */
uint8_t num_linked_inputs;
} gs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
struct radv_vs_output_info outinfo;
struct radv_es_output_info es_info;
bool as_es;
unsigned primitive_mode;
enum gl_tess_spacing spacing;
bool ccw;
bool point_mode;
bool export_prim_id;
uint8_t num_linked_inputs;
uint8_t num_linked_patch_inputs;
uint8_t num_linked_outputs;
} tes;
struct {
bool uses_sample_shading;
bool needs_sample_positions;
bool writes_memory;
bool writes_z;
bool writes_stencil;
bool writes_sample_mask;
bool has_pcoord;
bool prim_id_input;
bool layer_input;
bool viewport_index_input;
uint8_t num_input_clips_culls;
uint32_t input_mask;
uint32_t flat_shaded_mask;
uint32_t explicit_shaded_mask;
uint32_t float16_shaded_mask;
uint32_t num_interp;
bool can_discard;
bool early_fragment_test;
bool post_depth_coverage;
bool reads_sample_mask_in;
uint8_t depth_layout;
bool uses_persp_or_linear_interp;
bool allow_flat_shading;
} ps;
struct {
bool uses_grid_size;
bool uses_block_id[3];
bool uses_thread_id[3];
bool uses_local_invocation_idx;
unsigned block_size[3];
} cs;
struct {
uint64_t tes_inputs_read;
uint64_t tes_patch_inputs_read;
unsigned tcs_vertices_out;
uint32_t num_lds_blocks;
uint8_t num_linked_inputs;
uint8_t num_linked_outputs;
uint8_t num_linked_patch_outputs;
bool tes_reads_tess_factors:1;
} tcs;
bool loads_push_constants;
bool loads_dynamic_offsets;
uint8_t min_push_constant_used;
uint8_t max_push_constant_used;
bool has_only_32bit_push_constants;
bool has_indirect_push_constants;
uint8_t num_inline_push_consts;
uint8_t base_inline_push_consts;
uint32_t desc_set_used_mask;
bool needs_multiview_view_index;
bool uses_invocation_id;
bool uses_prim_id;
uint8_t wave_size;
uint8_t ballot_bit_size;
struct radv_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
unsigned num_input_sgprs;
unsigned num_input_vgprs;
unsigned private_mem_vgprs;
bool need_indirect_descriptor_sets;
bool is_ngg;
bool is_ngg_passthrough;
uint32_t num_tess_patches;
struct {
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
bool has_vertex_buffers; /* needs vertex buffers and base/start */
bool needs_draw_id;
bool needs_instance_id;
struct radv_vs_output_info outinfo;
struct radv_es_output_info es_info;
bool as_es;
bool as_ls;
bool export_prim_id;
bool tcs_in_out_eq;
uint64_t tcs_temp_only_input_mask;
uint8_t num_linked_outputs;
bool needs_base_instance;
} vs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
uint8_t num_stream_output_components[4];
uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
uint8_t max_stream;
unsigned gsvs_vertex_size;
unsigned max_gsvs_emit_size;
unsigned vertices_in;
unsigned vertices_out;
unsigned output_prim;
unsigned invocations;
unsigned es_type; /* GFX9: VS or TES */
uint8_t num_linked_inputs;
} gs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
struct radv_vs_output_info outinfo;
struct radv_es_output_info es_info;
bool as_es;
unsigned primitive_mode;
enum gl_tess_spacing spacing;
bool ccw;
bool point_mode;
bool export_prim_id;
uint8_t num_linked_inputs;
uint8_t num_linked_patch_inputs;
uint8_t num_linked_outputs;
} tes;
struct {
bool uses_sample_shading;
bool needs_sample_positions;
bool writes_memory;
bool writes_z;
bool writes_stencil;
bool writes_sample_mask;
bool has_pcoord;
bool prim_id_input;
bool layer_input;
bool viewport_index_input;
uint8_t num_input_clips_culls;
uint32_t input_mask;
uint32_t flat_shaded_mask;
uint32_t explicit_shaded_mask;
uint32_t float16_shaded_mask;
uint32_t num_interp;
bool can_discard;
bool early_fragment_test;
bool post_depth_coverage;
bool reads_sample_mask_in;
uint8_t depth_layout;
bool uses_persp_or_linear_interp;
bool allow_flat_shading;
} ps;
struct {
bool uses_grid_size;
bool uses_block_id[3];
bool uses_thread_id[3];
bool uses_local_invocation_idx;
unsigned block_size[3];
} cs;
struct {
uint64_t tes_inputs_read;
uint64_t tes_patch_inputs_read;
unsigned tcs_vertices_out;
uint32_t num_lds_blocks;
uint8_t num_linked_inputs;
uint8_t num_linked_outputs;
uint8_t num_linked_patch_outputs;
bool tes_reads_tess_factors : 1;
} tcs;
struct radv_streamout_info so;
struct radv_streamout_info so;
struct gfx9_gs_info gs_ring_info;
struct gfx10_ngg_info ngg_info;
struct gfx9_gs_info gs_ring_info;
struct gfx10_ngg_info ngg_info;
unsigned float_controls_mode;
unsigned float_controls_mode;
};
enum radv_shader_binary_type {
RADV_BINARY_TYPE_LEGACY,
RADV_BINARY_TYPE_RTLD
};
enum radv_shader_binary_type { RADV_BINARY_TYPE_LEGACY, RADV_BINARY_TYPE_RTLD };
struct radv_shader_binary {
enum radv_shader_binary_type type;
gl_shader_stage stage;
bool is_gs_copy_shader;
enum radv_shader_binary_type type;
gl_shader_stage stage;
bool is_gs_copy_shader;
struct radv_shader_info info;
struct radv_shader_info info;
/* Self-referential size so we avoid consistency issues. */
uint32_t total_size;
/* Self-referential size so we avoid consistency issues. */
uint32_t total_size;
};
struct radv_shader_binary_legacy {
struct radv_shader_binary base;
struct ac_shader_config config;
unsigned code_size;
unsigned exec_size;
unsigned ir_size;
unsigned disasm_size;
unsigned stats_size;
struct radv_shader_binary base;
struct ac_shader_config config;
unsigned code_size;
unsigned exec_size;
unsigned ir_size;
unsigned disasm_size;
unsigned stats_size;
/* data has size of stats_size + code_size + ir_size + disasm_size + 2,
* where the +2 is for 0 of the ir strings. */
uint8_t data[0];
/* data has size of stats_size + code_size + ir_size + disasm_size + 2,
* where the +2 is for 0 of the ir strings. */
uint8_t data[0];
};
struct radv_shader_binary_rtld {
struct radv_shader_binary base;
unsigned elf_size;
unsigned llvm_ir_size;
uint8_t data[0];
struct radv_shader_binary base;
unsigned elf_size;
unsigned llvm_ir_size;
uint8_t data[0];
};
struct radv_shader_variant {
uint32_t ref_count;
uint32_t ref_count;
struct radeon_winsys_bo *bo;
uint64_t bo_offset;
struct ac_shader_config config;
uint8_t *code_ptr;
uint32_t code_size;
uint32_t exec_size;
struct radv_shader_info info;
struct radeon_winsys_bo *bo;
uint64_t bo_offset;
struct ac_shader_config config;
uint8_t *code_ptr;
uint32_t code_size;
uint32_t exec_size;
struct radv_shader_info info;
/* debug only */
char *spirv;
uint32_t spirv_size;
char *nir_string;
char *disasm_string;
char *ir_string;
uint32_t *statistics;
/* debug only */
char *spirv;
uint32_t spirv_size;
char *nir_string;
char *disasm_string;
char *ir_string;
uint32_t *statistics;
struct list_head slab_list;
struct list_head slab_list;
};
struct radv_shader_slab {
struct list_head slabs;
struct list_head shaders;
struct radeon_winsys_bo *bo;
uint64_t size;
char *ptr;
struct list_head slabs;
struct list_head shaders;
struct radeon_winsys_bo *bo;
uint64_t size;
char *ptr;
};
void
radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
bool optimize_conservatively, bool allow_copies);
bool
radv_nir_lower_ycbcr_textures(nir_shader *shader,
const struct radv_pipeline_layout *layout);
void radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
bool optimize_conservatively, bool allow_copies);
bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout);
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device,
struct vk_shader_module *module,
const char *entrypoint_name,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *key);
nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
const char *entrypoint_name, gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *key);
void
radv_destroy_shader_slabs(struct radv_device *device);
void radv_destroy_shader_slabs(struct radv_device *device);
VkResult
radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
struct radv_pipeline_cache *cache,
const struct radv_pipeline_key *key,
const VkPipelineShaderStageCreateInfo **pStages,
const VkPipelineCreateFlags flags,
VkPipelineCreationFeedbackEXT *pipeline_feedback,
VkPipelineCreationFeedbackEXT **stage_feedbacks);
VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key,
const VkPipelineShaderStageCreateInfo **pStages,
const VkPipelineCreateFlags flags,
VkPipelineCreationFeedbackEXT *pipeline_feedback,
VkPipelineCreationFeedbackEXT **stage_feedbacks);
struct radv_shader_variant *
radv_shader_variant_create(struct radv_device *device,
const struct radv_shader_binary *binary,
bool keep_shader_info);
struct radv_shader_variant *
radv_shader_variant_compile(struct radv_device *device,
struct vk_shader_module *module,
struct nir_shader *const *shaders,
int shader_count,
struct radv_pipeline_layout *layout,
const struct radv_shader_variant_key *key,
struct radv_shader_info *info,
bool keep_shader_info, bool keep_statistic_info,
bool disable_optimizations,
struct radv_shader_binary **binary_out);
struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
const struct radv_shader_binary *binary,
bool keep_shader_info);
struct radv_shader_variant *radv_shader_variant_compile(
struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders,
int shader_count, struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key,
struct radv_shader_info *info, bool keep_shader_info, bool keep_statistic_info,
bool disable_optimizations, struct radv_shader_binary **binary_out);
struct radv_shader_variant *
radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
struct radv_shader_info *info,
struct radv_shader_binary **binary_out,
bool multiview, bool keep_shader_info,
bool keep_statistic_info,
bool disable_optimizations);
struct radv_shader_info *info, struct radv_shader_binary **binary_out,
bool multiview, bool keep_shader_info, bool keep_statistic_info,
bool disable_optimizations);
struct radv_shader_variant *
radv_create_trap_handler_shader(struct radv_device *device);
struct radv_shader_variant *radv_create_trap_handler_shader(struct radv_device *device);
void
radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant);
void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant);
unsigned radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
gl_shader_stage stage);
unsigned
radv_get_max_waves(struct radv_device *device,
struct radv_shader_variant *variant,
gl_shader_stage stage);
unsigned radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
const unsigned *sizes);
unsigned
radv_get_max_workgroup_size(enum chip_class chip_class,
gl_shader_stage stage,
const unsigned *sizes);
const char *radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage);
const char *
radv_get_shader_name(struct radv_shader_info *info,
gl_shader_stage stage);
bool radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
bool is_gs_copy_shader);
bool
radv_can_dump_shader(struct radv_device *device,
struct vk_shader_module *module,
bool is_gs_copy_shader);
bool radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module);
bool
radv_can_dump_shader_stats(struct radv_device *device,
struct vk_shader_module *module);
VkResult
radv_dump_shader_stats(struct radv_device *device,
struct radv_pipeline *pipeline,
gl_shader_stage stage, FILE *output);
VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
gl_shader_stage stage, FILE *output);
static inline unsigned
calculate_tess_lds_size(enum chip_class chip_class,
unsigned tcs_num_input_vertices,
unsigned tcs_num_output_vertices,
unsigned tcs_num_inputs,
unsigned tcs_num_patches,
unsigned tcs_num_outputs,
unsigned tcs_num_patch_outputs)
calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_vertices,
unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
unsigned tcs_num_patches, unsigned tcs_num_outputs,
unsigned tcs_num_patch_outputs)
{
unsigned input_vertex_size = tcs_num_inputs * 16;
unsigned output_vertex_size = tcs_num_outputs * 16;
unsigned input_vertex_size = tcs_num_inputs * 16;
unsigned output_vertex_size = tcs_num_outputs * 16;
unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
if (chip_class >= GFX7) {
assert(lds_size <= 65536);
lds_size = align(lds_size, 512) / 512;
} else {
assert(lds_size <= 32768);
lds_size = align(lds_size, 256) / 256;
}
if (chip_class >= GFX7) {
assert(lds_size <= 65536);
lds_size = align(lds_size, 512) / 512;
} else {
assert(lds_size <= 32768);
lds_size = align(lds_size, 256) / 256;
}
return lds_size;
return lds_size;
}
static inline unsigned
get_tcs_num_patches(unsigned tcs_num_input_vertices,
unsigned tcs_num_output_vertices,
unsigned tcs_num_inputs,
unsigned tcs_num_outputs,
unsigned tcs_num_patch_outputs,
unsigned tess_offchip_block_dw_size,
enum chip_class chip_class,
enum radeon_family family)
get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
unsigned tcs_num_inputs, unsigned tcs_num_outputs,
unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
enum chip_class chip_class, enum radeon_family family)
{
uint32_t input_vertex_size = tcs_num_inputs * 16;
uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
uint32_t output_vertex_size = tcs_num_outputs * 16;
uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
uint32_t input_vertex_size = tcs_num_inputs * 16;
uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
uint32_t output_vertex_size = tcs_num_outputs * 16;
uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
/* Ensure that we only need one wave per SIMD so we don't need to check
* resource usage. Also ensures that the number of tcs in and out
* vertices per threadgroup are at most 256.
*/
unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.
*/
unsigned hardware_lds_size = 32768;
/* Ensure that we only need one wave per SIMD so we don't need to check
* resource usage. Also ensures that the number of tcs in and out
* vertices per threadgroup are at most 256.
*/
unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.
*/
unsigned hardware_lds_size = 32768;
/* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
* threadgroup, even though there is more than 32 KiB LDS.
*
* Test: dEQP-VK.tessellation.shader_input_output.barrier
*/
if (chip_class >= GFX7 && family != CHIP_STONEY)
hardware_lds_size = 65536;
/* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
* threadgroup, even though there is more than 32 KiB LDS.
*
* Test: dEQP-VK.tessellation.shader_input_output.barrier
*/
if (chip_class >= GFX7 && family != CHIP_STONEY)
hardware_lds_size = 65536;
if (input_patch_size + output_patch_size)
num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
/* Make sure the output data fits in the offchip buffer */
if (output_patch_size)
num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
/* Not necessary for correctness, but improves performance. The
* specific value is taken from the proprietary driver.
*/
num_patches = MIN2(num_patches, 40);
if (input_patch_size + output_patch_size)
num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
/* Make sure the output data fits in the offchip buffer */
if (output_patch_size)
num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
/* Not necessary for correctness, but improves performance. The
* specific value is taken from the proprietary driver.
*/
num_patches = MIN2(num_patches, 40);
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
if (chip_class == GFX6) {
unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
num_patches = MIN2(num_patches, one_wave);
}
return num_patches;
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
if (chip_class == GFX6) {
unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
num_patches = MIN2(num_patches, one_wave);
}
return num_patches;
}
void
radv_lower_io(struct radv_device *device, nir_shader *nir);
void radv_lower_io(struct radv_device *device, nir_shader *nir);
bool
radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
bool radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
#endif
File diff suppressed because it is too large Load Diff
+18 -20
View File
@@ -21,38 +21,36 @@
* IN THE SOFTWARE.
*/
#include "ac_shader_args.h"
#include "radv_constants.h"
#include "util/list.h"
#include "compiler/shader_enums.h"
#include "util/list.h"
#include "util/macros.h"
#include "ac_shader_args.h"
#include "amd_family.h"
#include "radv_constants.h"
struct radv_shader_args {
struct ac_shader_args ac;
struct radv_shader_info *shader_info;
const struct radv_nir_compiler_options *options;
struct ac_shader_args ac;
struct radv_shader_info *shader_info;
const struct radv_nir_compiler_options *options;
struct ac_arg descriptor_sets[MAX_SETS];
struct ac_arg ring_offsets;
struct ac_arg descriptor_sets[MAX_SETS];
struct ac_arg ring_offsets;
/* Streamout */
struct ac_arg streamout_buffers;
/* Streamout */
struct ac_arg streamout_buffers;
/* NGG GS */
struct ac_arg ngg_gs_state;
/* NGG GS */
struct ac_arg ngg_gs_state;
bool is_gs_copy_shader;
bool is_trap_handler_shader;
bool is_gs_copy_shader;
bool is_trap_handler_shader;
};
static inline struct radv_shader_args *
radv_shader_args_from_ac(struct ac_shader_args *args)
{
return container_of(args, struct radv_shader_args, ac);
return container_of(args, struct radv_shader_args, ac);
}
void radv_declare_shader_args(struct radv_shader_args *args,
gl_shader_stage stage,
bool has_previous_stage,
gl_shader_stage previous_stage);
void radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage);
+6 -10
View File
@@ -26,17 +26,13 @@
extern "C" {
#endif
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
bool thread_compiler,
enum radeon_family family,
enum ac_target_machine_options tm_options,
unsigned wave_size);
void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
bool thread_compiler);
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
enum radeon_family family, enum ac_target_machine_options tm_options,
unsigned wave_size);
void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler);
bool radv_compile_to_elf(struct ac_llvm_compiler *info,
LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size);
bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
size_t *pelf_size);
#ifdef __cplusplus
}
File diff suppressed because it is too large Load Diff
+432 -478
View File
File diff suppressed because it is too large Load Diff
+52 -59
View File
@@ -21,117 +21,110 @@
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include "radv_private.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "vk_enum_to_str.h"
#include "util/u_math.h"
/** Log an error message. */
void radv_printflike(1, 2)
radv_loge(const char *format, ...)
void radv_printflike(1, 2) radv_loge(const char *format, ...)
{
va_list va;
va_list va;
va_start(va, format);
radv_loge_v(format, va);
va_end(va);
va_start(va, format);
radv_loge_v(format, va);
va_end(va);
}
/** \see radv_loge() */
void
radv_loge_v(const char *format, va_list va)
{
fprintf(stderr, "vk: error: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
fprintf(stderr, "vk: error: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
}
/** Log an error message. */
void radv_printflike(1, 2)
radv_logi(const char *format, ...)
void radv_printflike(1, 2) radv_logi(const char *format, ...)
{
va_list va;
va_list va;
va_start(va, format);
radv_logi_v(format, va);
va_end(va);
va_start(va, format);
radv_logi_v(format, va);
va_end(va);
}
/** \see radv_logi() */
void
radv_logi_v(const char *format, va_list va)
{
fprintf(stderr, "radv: info: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
fprintf(stderr, "radv: info: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
}
void radv_printflike(3, 4)
__radv_finishme(const char *file, int line, const char *format, ...)
void radv_printflike(3, 4) __radv_finishme(const char *file, int line, const char *format, ...)
{
va_list ap;
char buffer[256];
va_list ap;
char buffer[256];
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
}
VkResult
__vk_errorv(struct radv_instance *instance, const void *object,
VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
int line, const char *format, va_list ap)
__vk_errorv(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
VkResult error, const char *file, int line, const char *format, va_list ap)
{
char buffer[256];
char report[512];
char buffer[256];
char report[512];
const char *error_str = vk_Result_to_str(error);
const char *error_str = vk_Result_to_str(error);
#ifndef DEBUG
if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
return error;
if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
return error;
#endif
if (format) {
vsnprintf(buffer, sizeof(buffer), format, ap);
if (format) {
vsnprintf(buffer, sizeof(buffer), format, ap);
snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line,
buffer, error_str);
} else {
snprintf(report, sizeof(report), "%s:%d: %s", file, line,
error_str);
}
snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line, buffer, error_str);
} else {
snprintf(report, sizeof(report), "%s:%d: %s", file, line, error_str);
}
if (instance) {
vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT,
object, line, 0, "radv", report);
}
if (instance) {
vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT, object, line, 0, "radv",
report);
}
fprintf(stderr, "%s\n", report);
fprintf(stderr, "%s\n", report);
return error;
return error;
}
VkResult
__vk_errorf(struct radv_instance *instance, const void *object,
VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
int line, const char *format, ...)
__vk_errorf(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
VkResult error, const char *file, int line, const char *format, ...)
{
va_list ap;
va_list ap;
va_start(ap, format);
__vk_errorv(instance, object, type, error, file, line, format, ap);
va_end(ap);
va_start(ap, format);
__vk_errorv(instance, object, type, error, file, line, format, ap);
va_end(ap);
return error;
return error;
}
+163 -215
View File
@@ -23,315 +23,263 @@
* IN THE SOFTWARE.
*/
#include "radv_private.h"
#include "radv_meta.h"
#include "wsi_common.h"
#include "vk_util.h"
#include "util/macros.h"
#include "radv_meta.h"
#include "radv_private.h"
#include "vk_util.h"
#include "wsi_common.h"
static PFN_vkVoidFunction
radv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
}
static void
radv_wsi_set_memory_ownership(VkDevice _device,
VkDeviceMemory _mem,
VkBool32 ownership)
radv_wsi_set_memory_ownership(VkDevice _device, VkDeviceMemory _mem, VkBool32 ownership)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
if (device->use_global_bo_list) {
device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
}
if (device->use_global_bo_list) {
device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
}
}
VkResult
radv_init_wsi(struct radv_physical_device *physical_device)
{
VkResult result = wsi_device_init(&physical_device->wsi_device,
radv_physical_device_to_handle(physical_device),
radv_wsi_proc_addr,
&physical_device->instance->vk.alloc,
physical_device->master_fd,
&physical_device->instance->dri_options,
false);
if (result != VK_SUCCESS)
return result;
VkResult result =
wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device),
radv_wsi_proc_addr, &physical_device->instance->vk.alloc,
physical_device->master_fd, &physical_device->instance->dri_options, false);
if (result != VK_SUCCESS)
return result;
physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
return VK_SUCCESS;
physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
return VK_SUCCESS;
}
void
radv_finish_wsi(struct radv_physical_device *physical_device)
{
wsi_device_finish(&physical_device->wsi_device,
&physical_device->instance->vk.alloc);
wsi_device_finish(&physical_device->wsi_device, &physical_device->instance->vk.alloc);
}
void radv_DestroySurfaceKHR(
VkInstance _instance,
VkSurfaceKHR _surface,
const VkAllocationCallbacks* pAllocator)
void
radv_DestroySurfaceKHR(VkInstance _instance, VkSurfaceKHR _surface,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
RADV_FROM_HANDLE(radv_instance, instance, _instance);
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
vk_free2(&instance->vk.alloc, pAllocator, surface);
vk_free2(&instance->vk.alloc, pAllocator, surface);
}
VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkBool32* pSupported)
VkResult
radv_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex,
VkSurfaceKHR surface, VkBool32 *pSupported)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_support(&device->wsi_device,
queueFamilyIndex,
surface,
pSupported);
return wsi_common_get_surface_support(&device->wsi_device, queueFamilyIndex, surface,
pSupported);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
VkResult
radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR *pSurfaceCapabilities)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_capabilities(&device->wsi_device,
surface,
pSurfaceCapabilities);
return wsi_common_get_surface_capabilities(&device->wsi_device, surface, pSurfaceCapabilities);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilities2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
VkSurfaceCapabilities2KHR* pSurfaceCapabilities)
VkResult
radv_GetPhysicalDeviceSurfaceCapabilities2KHR(VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
VkSurfaceCapabilities2KHR *pSurfaceCapabilities)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_capabilities2(&device->wsi_device,
pSurfaceInfo,
pSurfaceCapabilities);
return wsi_common_get_surface_capabilities2(&device->wsi_device, pSurfaceInfo,
pSurfaceCapabilities);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilities2EXT(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilities2EXT* pSurfaceCapabilities)
VkResult
radv_GetPhysicalDeviceSurfaceCapabilities2EXT(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
VkSurfaceCapabilities2EXT *pSurfaceCapabilities)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_capabilities2ext(&device->wsi_device,
surface,
pSurfaceCapabilities);
return wsi_common_get_surface_capabilities2ext(&device->wsi_device, surface,
pSurfaceCapabilities);
}
VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats)
VkResult
radv_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
uint32_t *pSurfaceFormatCount,
VkSurfaceFormatKHR *pSurfaceFormats)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_formats(&device->wsi_device,
surface,
pSurfaceFormatCount,
pSurfaceFormats);
return wsi_common_get_surface_formats(&device->wsi_device, surface, pSurfaceFormatCount,
pSurfaceFormats);
}
VkResult radv_GetPhysicalDeviceSurfaceFormats2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormat2KHR* pSurfaceFormats)
VkResult
radv_GetPhysicalDeviceSurfaceFormats2KHR(VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
uint32_t *pSurfaceFormatCount,
VkSurfaceFormat2KHR *pSurfaceFormats)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_formats2(&device->wsi_device,
pSurfaceInfo,
pSurfaceFormatCount,
pSurfaceFormats);
return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, pSurfaceFormatCount,
pSurfaceFormats);
}
VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes)
VkResult
radv_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
uint32_t *pPresentModeCount,
VkPresentModeKHR *pPresentModes)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_surface_present_modes(&device->wsi_device,
surface,
pPresentModeCount,
pPresentModes);
return wsi_common_get_surface_present_modes(&device->wsi_device, surface, pPresentModeCount,
pPresentModes);
}
VkResult radv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchain)
VkResult
radv_CreateSwapchainKHR(VkDevice _device, const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkSwapchainKHR *pSwapchain)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const VkAllocationCallbacks *alloc;
if (pAllocator)
alloc = pAllocator;
else
alloc = &device->vk.alloc;
RADV_FROM_HANDLE(radv_device, device, _device);
const VkAllocationCallbacks *alloc;
if (pAllocator)
alloc = pAllocator;
else
alloc = &device->vk.alloc;
return wsi_common_create_swapchain(&device->physical_device->wsi_device,
radv_device_to_handle(device),
pCreateInfo,
alloc,
pSwapchain);
return wsi_common_create_swapchain(&device->physical_device->wsi_device,
radv_device_to_handle(device), pCreateInfo, alloc,
pSwapchain);
}
void radv_DestroySwapchainKHR(
VkDevice _device,
VkSwapchainKHR swapchain,
const VkAllocationCallbacks* pAllocator)
void
radv_DestroySwapchainKHR(VkDevice _device, VkSwapchainKHR swapchain,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const VkAllocationCallbacks *alloc;
RADV_FROM_HANDLE(radv_device, device, _device);
const VkAllocationCallbacks *alloc;
if (pAllocator)
alloc = pAllocator;
else
alloc = &device->vk.alloc;
if (pAllocator)
alloc = pAllocator;
else
alloc = &device->vk.alloc;
wsi_common_destroy_swapchain(_device, swapchain, alloc);
wsi_common_destroy_swapchain(_device, swapchain, alloc);
}
VkResult radv_GetSwapchainImagesKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint32_t* pSwapchainImageCount,
VkImage* pSwapchainImages)
VkResult
radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain,
uint32_t *pSwapchainImageCount, VkImage *pSwapchainImages)
{
return wsi_common_get_images(swapchain,
pSwapchainImageCount,
pSwapchainImages);
return wsi_common_get_images(swapchain, pSwapchainImageCount, pSwapchainImages);
}
VkResult radv_AcquireNextImageKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint64_t timeout,
VkSemaphore semaphore,
VkFence fence,
uint32_t* pImageIndex)
VkResult
radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout,
VkSemaphore semaphore, VkFence fence, uint32_t *pImageIndex)
{
VkAcquireNextImageInfoKHR acquire_info = {
.sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
.swapchain = swapchain,
.timeout = timeout,
.semaphore = semaphore,
.fence = fence,
.deviceMask = 0,
};
VkAcquireNextImageInfoKHR acquire_info = {
.sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
.swapchain = swapchain,
.timeout = timeout,
.semaphore = semaphore,
.fence = fence,
.deviceMask = 0,
};
return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
}
VkResult radv_AcquireNextImage2KHR(
VkDevice _device,
const VkAcquireNextImageInfoKHR* pAcquireInfo,
uint32_t* pImageIndex)
VkResult
radv_AcquireNextImage2KHR(VkDevice _device, const VkAcquireNextImageInfoKHR *pAcquireInfo,
uint32_t *pImageIndex)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdevice = device->physical_device;
RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdevice = device->physical_device;
RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
_device,
pAcquireInfo,
pImageIndex);
VkResult result =
wsi_common_acquire_next_image2(&pdevice->wsi_device, _device, pAcquireInfo, pImageIndex);
if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
if (fence) {
struct radv_fence_part *part =
fence->temporary.kind != RADV_FENCE_NONE ?
&fence->temporary : &fence->permanent;
if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
if (fence) {
struct radv_fence_part *part =
fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
device->ws->signal_syncobj(device->ws, part->syncobj, 0);
}
if (semaphore) {
struct radv_semaphore_part *part =
semaphore->temporary.kind != RADV_SEMAPHORE_NONE ?
&semaphore->temporary : &semaphore->permanent;
device->ws->signal_syncobj(device->ws, part->syncobj, 0);
}
if (semaphore) {
struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
? &semaphore->temporary
: &semaphore->permanent;
switch (part->kind) {
case RADV_SEMAPHORE_NONE:
/* Do not need to do anything. */
break;
case RADV_SEMAPHORE_TIMELINE:
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
unreachable("WSI only allows binary semaphores.");
case RADV_SEMAPHORE_SYNCOBJ:
device->ws->signal_syncobj(device->ws, part->syncobj, 0);
break;
}
}
}
return result;
switch (part->kind) {
case RADV_SEMAPHORE_NONE:
/* Do not need to do anything. */
break;
case RADV_SEMAPHORE_TIMELINE:
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
unreachable("WSI only allows binary semaphores.");
case RADV_SEMAPHORE_SYNCOBJ:
device->ws->signal_syncobj(device->ws, part->syncobj, 0);
break;
}
}
}
return result;
}
VkResult radv_QueuePresentKHR(
VkQueue _queue,
const VkPresentInfoKHR* pPresentInfo)
VkResult
radv_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
{
RADV_FROM_HANDLE(radv_queue, queue, _queue);
return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
radv_device_to_handle(queue->device),
_queue,
queue->queue_family_index,
pPresentInfo);
RADV_FROM_HANDLE(radv_queue, queue, _queue);
return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
radv_device_to_handle(queue->device), _queue,
queue->queue_family_index, pPresentInfo);
}
VkResult radv_GetDeviceGroupPresentCapabilitiesKHR(
VkDevice device,
VkDeviceGroupPresentCapabilitiesKHR* pCapabilities)
VkResult
radv_GetDeviceGroupPresentCapabilitiesKHR(VkDevice device,
VkDeviceGroupPresentCapabilitiesKHR *pCapabilities)
{
memset(pCapabilities->presentMask, 0,
sizeof(pCapabilities->presentMask));
memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask));
pCapabilities->presentMask[0] = 0x1;
pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
return VK_SUCCESS;
}
VkResult radv_GetDeviceGroupSurfacePresentModesKHR(
VkDevice device,
VkSurfaceKHR surface,
VkDeviceGroupPresentModeFlagsKHR* pModes)
VkResult
radv_GetDeviceGroupSurfacePresentModesKHR(VkDevice device, VkSurfaceKHR surface,
VkDeviceGroupPresentModeFlagsKHR *pModes)
{
*pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
return VK_SUCCESS;
}
VkResult radv_GetPhysicalDevicePresentRectanglesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pRectCount,
VkRect2D* pRects)
VkResult
radv_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
uint32_t *pRectCount, VkRect2D *pRects)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_common_get_present_rectangles(&device->wsi_device,
surface,
pRectCount, pRects);
return wsi_common_get_present_rectangles(&device->wsi_device, surface, pRectCount, pRects);
}
+145 -221
View File
@@ -20,39 +20,36 @@
* OF THIS SOFTWARE.
*/
#include <amdgpu.h>
#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "radv_private.h"
#include "radv_cs.h"
#include "util/disk_cache.h"
#include "util/strtod.h"
#include "vk_util.h"
#include <xf86drm.h>
#include <xf86drmMode.h>
#include <amdgpu.h>
#include "drm-uapi/amdgpu_drm.h"
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
#include "vk_format.h"
#include "sid.h"
#include "util/debug.h"
#include "util/disk_cache.h"
#include "util/strtod.h"
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
#include "radv_cs.h"
#include "radv_private.h"
#include "sid.h"
#include "vk_format.h"
#include "vk_util.h"
#include "wsi_common_display.h"
#define MM_PER_PIXEL (1.0/96.0 * 25.4)
#define MM_PER_PIXEL (1.0 / 96.0 * 25.4)
VkResult
radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPropertiesKHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_physical_device_display_properties(
physical_device,
&pdevice->wsi_device,
property_count,
properties);
return wsi_display_get_physical_device_display_properties(physical_device, &pdevice->wsi_device,
property_count, properties);
}
VkResult
@@ -60,122 +57,84 @@ radv_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayProperties2KHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_physical_device_display_properties2(
physical_device,
&pdevice->wsi_device,
property_count,
properties);
return wsi_display_get_physical_device_display_properties2(physical_device, &pdevice->wsi_device,
property_count, properties);
}
VkResult
radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(
VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPlanePropertiesKHR *properties)
radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPlanePropertiesKHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_physical_device_display_plane_properties(
physical_device,
&pdevice->wsi_device,
property_count,
properties);
return wsi_display_get_physical_device_display_plane_properties(
physical_device, &pdevice->wsi_device, property_count, properties);
}
VkResult
radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(
VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPlaneProperties2KHR *properties)
radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPlaneProperties2KHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_physical_device_display_plane_properties2(
physical_device,
&pdevice->wsi_device,
property_count,
properties);
return wsi_display_get_physical_device_display_plane_properties2(
physical_device, &pdevice->wsi_device, property_count, properties);
}
VkResult
radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device,
uint32_t plane_index,
uint32_t *display_count,
VkDisplayKHR *displays)
radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, uint32_t plane_index,
uint32_t *display_count, VkDisplayKHR *displays)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_display_plane_supported_displays(
physical_device,
&pdevice->wsi_device,
plane_index,
display_count,
displays);
return wsi_display_get_display_plane_supported_displays(physical_device, &pdevice->wsi_device,
plane_index, display_count, displays);
}
VkResult
radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device,
VkDisplayKHR display,
uint32_t *property_count,
VkDisplayModePropertiesKHR *properties)
radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
uint32_t *property_count, VkDisplayModePropertiesKHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_display_mode_properties(physical_device,
&pdevice->wsi_device,
display,
property_count,
properties);
return wsi_display_get_display_mode_properties(physical_device, &pdevice->wsi_device, display,
property_count, properties);
}
VkResult
radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device,
VkDisplayKHR display,
uint32_t *property_count,
VkDisplayModeProperties2KHR *properties)
radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
uint32_t *property_count, VkDisplayModeProperties2KHR *properties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_get_display_mode_properties2(physical_device,
&pdevice->wsi_device,
display,
property_count,
properties);
return wsi_display_get_display_mode_properties2(physical_device, &pdevice->wsi_device, display,
property_count, properties);
}
VkResult
radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device,
VkDisplayKHR display,
radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
const VkDisplayModeCreateInfoKHR *create_info,
const VkAllocationCallbacks *allocator,
VkDisplayModeKHR *mode)
const VkAllocationCallbacks *allocator, VkDisplayModeKHR *mode)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_display_create_display_mode(physical_device,
&pdevice->wsi_device,
display,
create_info,
allocator,
mode);
return wsi_display_create_display_mode(physical_device, &pdevice->wsi_device, display,
create_info, allocator, mode);
}
VkResult
radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device,
VkDisplayModeKHR mode_khr,
radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, VkDisplayModeKHR mode_khr,
uint32_t plane_index,
VkDisplayPlaneCapabilitiesKHR *capabilities)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_get_display_plane_capabilities(physical_device,
&pdevice->wsi_device,
mode_khr,
plane_index,
capabilities);
return wsi_get_display_plane_capabilities(physical_device, &pdevice->wsi_device, mode_khr,
plane_index, capabilities);
}
VkResult
@@ -183,187 +142,152 @@ radv_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device,
const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
VkDisplayPlaneCapabilities2KHR *capabilities)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_get_display_plane_capabilities2(physical_device,
&pdevice->wsi_device,
pDisplayPlaneInfo,
capabilities);
return wsi_get_display_plane_capabilities2(physical_device, &pdevice->wsi_device,
pDisplayPlaneInfo, capabilities);
}
VkResult
radv_CreateDisplayPlaneSurfaceKHR(
VkInstance _instance,
const VkDisplaySurfaceCreateInfoKHR *create_info,
const VkAllocationCallbacks *allocator,
VkSurfaceKHR *surface)
radv_CreateDisplayPlaneSurfaceKHR(VkInstance _instance,
const VkDisplaySurfaceCreateInfoKHR *create_info,
const VkAllocationCallbacks *allocator, VkSurfaceKHR *surface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
if (allocator)
alloc = allocator;
else
alloc = &instance->vk.alloc;
if (allocator)
alloc = allocator;
else
alloc = &instance->vk.alloc;
return wsi_create_display_surface(_instance, alloc,
create_info, surface);
return wsi_create_display_surface(_instance, alloc, create_info, surface);
}
VkResult
radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device,
VkDisplayKHR display)
radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device, VkDisplayKHR display)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_release_display(physical_device,
&pdevice->wsi_device,
display);
return wsi_release_display(physical_device, &pdevice->wsi_device, display);
}
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
VkResult
radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device,
Display *dpy,
VkDisplayKHR display)
radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, VkDisplayKHR display)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_acquire_xlib_display(physical_device,
&pdevice->wsi_device,
dpy,
display);
return wsi_acquire_xlib_display(physical_device, &pdevice->wsi_device, dpy, display);
}
VkResult
radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device,
Display *dpy,
RROutput output,
VkDisplayKHR *display)
radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, RROutput output,
VkDisplayKHR *display)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
return wsi_get_randr_output_display(physical_device,
&pdevice->wsi_device,
dpy,
output,
display);
return wsi_get_randr_output_display(physical_device, &pdevice->wsi_device, dpy, output, display);
}
#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
/* VK_EXT_display_control */
VkResult
radv_DisplayPowerControlEXT(VkDevice _device,
VkDisplayKHR display,
const VkDisplayPowerInfoEXT *display_power_info)
radv_DisplayPowerControlEXT(VkDevice _device, VkDisplayKHR display,
const VkDisplayPowerInfoEXT *display_power_info)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device, device, _device);
return wsi_display_power_control(_device,
&device->physical_device->wsi_device,
display,
display_power_info);
return wsi_display_power_control(_device, &device->physical_device->wsi_device, display,
display_power_info);
}
VkResult
radv_RegisterDeviceEventEXT(VkDevice _device,
const VkDeviceEventInfoEXT *device_event_info,
const VkAllocationCallbacks *allocator,
VkFence *_fence)
radv_RegisterDeviceEventEXT(VkDevice _device, const VkDeviceEventInfoEXT *device_event_info,
const VkAllocationCallbacks *allocator, VkFence *_fence)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult ret;
int fd;
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult ret;
int fd;
ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = &(VkExportFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
},
}, allocator, _fence);
if (ret != VK_SUCCESS)
return ret;
ret = radv_CreateFence(_device,
&(VkFenceCreateInfo){
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext =
&(VkExportFenceCreateInfo){
.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
},
},
allocator, _fence);
if (ret != VK_SUCCESS)
return ret;
RADV_FROM_HANDLE(radv_fence, fence, *_fence);
RADV_FROM_HANDLE(radv_fence, fence, *_fence);
assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
ret = VK_ERROR_OUT_OF_HOST_MEMORY;
} else {
ret = wsi_register_device_event(_device,
&device->physical_device->wsi_device,
device_event_info,
allocator,
NULL,
fd);
close(fd);
}
if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
ret = VK_ERROR_OUT_OF_HOST_MEMORY;
} else {
ret = wsi_register_device_event(_device, &device->physical_device->wsi_device,
device_event_info, allocator, NULL, fd);
close(fd);
}
if (ret != VK_SUCCESS)
radv_DestroyFence(_device, *_fence, allocator);
if (ret != VK_SUCCESS)
radv_DestroyFence(_device, *_fence, allocator);
return ret;
return ret;
}
VkResult
radv_RegisterDisplayEventEXT(VkDevice _device,
VkDisplayKHR display,
const VkDisplayEventInfoEXT *display_event_info,
const VkAllocationCallbacks *allocator,
VkFence *_fence)
radv_RegisterDisplayEventEXT(VkDevice _device, VkDisplayKHR display,
const VkDisplayEventInfoEXT *display_event_info,
const VkAllocationCallbacks *allocator, VkFence *_fence)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult ret;
int fd;
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult ret;
int fd;
ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = &(VkExportFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
},
}, allocator, _fence);
if (ret != VK_SUCCESS)
return ret;
ret = radv_CreateFence(_device,
&(VkFenceCreateInfo){
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext =
&(VkExportFenceCreateInfo){
.sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
},
},
allocator, _fence);
if (ret != VK_SUCCESS)
return ret;
RADV_FROM_HANDLE(radv_fence, fence, *_fence);
RADV_FROM_HANDLE(radv_fence, fence, *_fence);
assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
ret = VK_ERROR_OUT_OF_HOST_MEMORY;
} else {
ret = wsi_register_display_event(_device,
&device->physical_device->wsi_device,
display,
display_event_info,
allocator,
NULL,
fd);
close(fd);
}
if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
ret = VK_ERROR_OUT_OF_HOST_MEMORY;
} else {
ret = wsi_register_display_event(_device, &device->physical_device->wsi_device, display,
display_event_info, allocator, NULL, fd);
close(fd);
}
if (ret != VK_SUCCESS)
radv_DestroyFence(_device, *_fence, allocator);
if (ret != VK_SUCCESS)
radv_DestroyFence(_device, *_fence, allocator);
return ret;
return ret;
}
VkResult
radv_GetSwapchainCounterEXT(VkDevice _device,
VkSwapchainKHR swapchain,
VkSurfaceCounterFlagBitsEXT flag_bits,
uint64_t *value)
radv_GetSwapchainCounterEXT(VkDevice _device, VkSwapchainKHR swapchain,
VkSurfaceCounterFlagBitsEXT flag_bits, uint64_t *value)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device, device, _device);
return wsi_get_swapchain_counter(_device,
&device->physical_device->wsi_device,
swapchain,
flag_bits,
value);
return wsi_get_swapchain_counter(_device, &device->physical_device->wsi_device, swapchain,
flag_bits, value);
}
+8 -10
View File
@@ -23,24 +23,22 @@
* IN THE SOFTWARE.
*/
#include "wsi_common_wayland.h"
#include "radv_private.h"
#include "wsi_common_wayland.h"
VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
struct wl_display* display)
VkBool32
radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
struct wl_display *display)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
return wsi_wl_get_presentation_support(&physical_device->wsi_device, display);
}
VkResult radv_CreateWaylandSurfaceKHR(
VkInstance _instance,
const VkWaylandSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
VkResult
radv_CreateWaylandSurfaceKHR(VkInstance _instance, const VkWaylandSurfaceCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
+25 -34
View File
@@ -27,64 +27,55 @@
#include <X11/Xlib-xcb.h>
#include <X11/xshmfence.h>
#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <xcb/present.h>
#include <xcb/xcb.h>
#include "wsi_common_x11.h"
#include "radv_private.h"
#include "wsi_common_x11.h"
VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
xcb_connection_t* connection,
xcb_visualid_t visual_id)
VkBool32
radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
xcb_connection_t *connection,
xcb_visualid_t visual_id)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_get_physical_device_xcb_presentation_support(
&device->wsi_device,
queueFamilyIndex,
connection, visual_id);
return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
connection, visual_id);
}
VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
Display* dpy,
VisualID visualID)
VkBool32
radv_GetPhysicalDeviceXlibPresentationSupportKHR(VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex, Display *dpy,
VisualID visualID)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
return wsi_get_physical_device_xcb_presentation_support(
&device->wsi_device,
queueFamilyIndex,
XGetXCBConnection(dpy), visualID);
return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
XGetXCBConnection(dpy), visualID);
}
VkResult radv_CreateXcbSurfaceKHR(
VkInstance _instance,
const VkXcbSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
VkResult
radv_CreateXcbSurfaceKHR(VkInstance _instance, const VkXcbSurfaceCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR);
if (pAllocator)
alloc = pAllocator;
alloc = pAllocator;
else
alloc = &instance->vk.alloc;
alloc = &instance->vk.alloc;
return wsi_create_xcb_surface(alloc, pCreateInfo, pSurface);
}
VkResult radv_CreateXlibSurfaceKHR(
VkInstance _instance,
const VkXlibSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
VkResult
radv_CreateXlibSurfaceKHR(VkInstance _instance, const VkXlibSurfaceCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
@@ -92,9 +83,9 @@ VkResult radv_CreateXlibSurfaceKHR(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR);
if (pAllocator)
alloc = pAllocator;
alloc = pAllocator;
else
alloc = &instance->vk.alloc;
alloc = &instance->vk.alloc;
return wsi_create_xlib_surface(alloc, pCreateInfo, pSurface);
}
File diff suppressed because it is too large Load Diff
+163 -165
View File
@@ -28,13 +28,14 @@
#define VK_FORMAT_H
#include <assert.h>
#include <vulkan/vulkan.h>
#include <util/macros.h>
#include <vulkan/util/vk_format.h>
#include <vulkan/vulkan.h>
static inline const struct util_format_description *vk_format_description(VkFormat format)
static inline const struct util_format_description *
vk_format_description(VkFormat format)
{
return util_format_description(vk_format_to_pipe_format(format));
return util_format_description(vk_format_to_pipe_format(format));
}
/**
@@ -43,7 +44,7 @@ static inline const struct util_format_description *vk_format_description(VkForm
static inline unsigned
vk_format_get_blocksizebits(VkFormat format)
{
return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
}
/**
@@ -52,19 +53,19 @@ vk_format_get_blocksizebits(VkFormat format)
static inline unsigned
vk_format_get_blocksize(VkFormat format)
{
return util_format_get_blocksize(vk_format_to_pipe_format(format));
return util_format_get_blocksize(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_blockwidth(VkFormat format)
{
return util_format_get_blockwidth(vk_format_to_pipe_format(format));
return util_format_get_blockwidth(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_blockheight(VkFormat format)
{
return util_format_get_blockheight(vk_format_to_pipe_format(format));
return util_format_get_blockheight(vk_format_to_pipe_format(format));
}
/**
@@ -74,259 +75,256 @@ vk_format_get_blockheight(VkFormat format)
static inline int
vk_format_get_first_non_void_channel(VkFormat format)
{
return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
}
static inline enum pipe_swizzle
radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], VkComponentSwizzle vk_swiz)
radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4],
VkComponentSwizzle vk_swiz)
{
if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
vk_swiz = component;
switch (vk_swiz) {
case VK_COMPONENT_SWIZZLE_ZERO:
return PIPE_SWIZZLE_0;
case VK_COMPONENT_SWIZZLE_ONE:
return PIPE_SWIZZLE_1;
case VK_COMPONENT_SWIZZLE_R:
case VK_COMPONENT_SWIZZLE_G:
case VK_COMPONENT_SWIZZLE_B:
case VK_COMPONENT_SWIZZLE_A:
return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
default:
unreachable("Illegal swizzle");
}
if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
vk_swiz = component;
switch (vk_swiz) {
case VK_COMPONENT_SWIZZLE_ZERO:
return PIPE_SWIZZLE_0;
case VK_COMPONENT_SWIZZLE_ONE:
return PIPE_SWIZZLE_1;
case VK_COMPONENT_SWIZZLE_R:
case VK_COMPONENT_SWIZZLE_G:
case VK_COMPONENT_SWIZZLE_B:
case VK_COMPONENT_SWIZZLE_A:
return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
default:
unreachable("Illegal swizzle");
}
}
static inline void vk_format_compose_swizzles(const VkComponentMapping *mapping,
const unsigned char swz[4],
enum pipe_swizzle dst[4])
static inline void
vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4],
enum pipe_swizzle dst[4])
{
dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
}
static inline bool
vk_format_is_compressed(VkFormat format)
{
return util_format_is_compressed(vk_format_to_pipe_format(format));
return util_format_is_compressed(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_subsampled(VkFormat format)
{
return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
}
static inline VkFormat
vk_format_depth_only(VkFormat format)
{
switch (format) {
case VK_FORMAT_D16_UNORM_S8_UINT:
return VK_FORMAT_D16_UNORM;
case VK_FORMAT_D24_UNORM_S8_UINT:
return VK_FORMAT_X8_D24_UNORM_PACK32;
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return VK_FORMAT_D32_SFLOAT;
default:
return format;
}
switch (format) {
case VK_FORMAT_D16_UNORM_S8_UINT:
return VK_FORMAT_D16_UNORM;
case VK_FORMAT_D24_UNORM_S8_UINT:
return VK_FORMAT_X8_D24_UNORM_PACK32;
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return VK_FORMAT_D32_SFLOAT;
default:
return format;
}
}
static inline bool
vk_format_is_int(VkFormat format)
{
return util_format_is_pure_integer(vk_format_to_pipe_format(format));
return util_format_is_pure_integer(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_uint(VkFormat format)
{
return util_format_is_pure_uint(vk_format_to_pipe_format(format));
return util_format_is_pure_uint(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_sint(VkFormat format)
{
return util_format_is_pure_sint(vk_format_to_pipe_format(format));
return util_format_is_pure_sint(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_unorm(VkFormat format)
{
return util_format_is_unorm(vk_format_to_pipe_format(format));
return util_format_is_unorm(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_srgb(VkFormat format)
{
return util_format_is_srgb(vk_format_to_pipe_format(format));
return util_format_is_srgb(vk_format_to_pipe_format(format));
}
static inline VkFormat
vk_format_no_srgb(VkFormat format)
{
switch(format) {
case VK_FORMAT_R8_SRGB:
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_R8G8_SRGB:
return VK_FORMAT_R8G8_UNORM;
case VK_FORMAT_R8G8B8_SRGB:
return VK_FORMAT_R8G8B8_UNORM;
case VK_FORMAT_B8G8R8_SRGB:
return VK_FORMAT_B8G8R8_UNORM;
case VK_FORMAT_R8G8B8A8_SRGB:
return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_B8G8R8A8_SRGB:
return VK_FORMAT_B8G8R8A8_UNORM;
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case VK_FORMAT_BC2_SRGB_BLOCK:
return VK_FORMAT_BC2_UNORM_BLOCK;
case VK_FORMAT_BC3_SRGB_BLOCK:
return VK_FORMAT_BC3_UNORM_BLOCK;
case VK_FORMAT_BC7_SRGB_BLOCK:
return VK_FORMAT_BC7_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
default:
assert(!vk_format_is_srgb(format));
return format;
}
switch (format) {
case VK_FORMAT_R8_SRGB:
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_R8G8_SRGB:
return VK_FORMAT_R8G8_UNORM;
case VK_FORMAT_R8G8B8_SRGB:
return VK_FORMAT_R8G8B8_UNORM;
case VK_FORMAT_B8G8R8_SRGB:
return VK_FORMAT_B8G8R8_UNORM;
case VK_FORMAT_R8G8B8A8_SRGB:
return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_B8G8R8A8_SRGB:
return VK_FORMAT_B8G8R8A8_UNORM;
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case VK_FORMAT_BC2_SRGB_BLOCK:
return VK_FORMAT_BC2_UNORM_BLOCK;
case VK_FORMAT_BC3_SRGB_BLOCK:
return VK_FORMAT_BC3_UNORM_BLOCK;
case VK_FORMAT_BC7_SRGB_BLOCK:
return VK_FORMAT_BC7_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
default:
assert(!vk_format_is_srgb(format));
return format;
}
}
static inline VkFormat
vk_format_stencil_only(VkFormat format)
{
return VK_FORMAT_S8_UINT;
return VK_FORMAT_S8_UINT;
}
static inline unsigned
vk_format_get_component_bits(VkFormat format,
enum util_format_colorspace colorspace,
unsigned component)
vk_format_get_component_bits(VkFormat format, enum util_format_colorspace colorspace,
unsigned component)
{
const struct util_format_description *desc = vk_format_description(format);
enum util_format_colorspace desc_colorspace;
const struct util_format_description *desc = vk_format_description(format);
enum util_format_colorspace desc_colorspace;
assert(format);
if (!format) {
return 0;
}
assert(format);
if (!format) {
return 0;
}
assert(component < 4);
assert(component < 4);
/* Treat RGB and SRGB as equivalent. */
if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
colorspace = UTIL_FORMAT_COLORSPACE_RGB;
}
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
} else {
desc_colorspace = desc->colorspace;
}
/* Treat RGB and SRGB as equivalent. */
if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
colorspace = UTIL_FORMAT_COLORSPACE_RGB;
}
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
} else {
desc_colorspace = desc->colorspace;
}
if (desc_colorspace != colorspace) {
return 0;
}
if (desc_colorspace != colorspace) {
return 0;
}
switch (desc->swizzle[component]) {
case PIPE_SWIZZLE_X:
return desc->channel[0].size;
case PIPE_SWIZZLE_Y:
return desc->channel[1].size;
case PIPE_SWIZZLE_Z:
return desc->channel[2].size;
case PIPE_SWIZZLE_W:
return desc->channel[3].size;
default:
return 0;
}
switch (desc->swizzle[component]) {
case PIPE_SWIZZLE_X:
return desc->channel[0].size;
case PIPE_SWIZZLE_Y:
return desc->channel[1].size;
case PIPE_SWIZZLE_Z:
return desc->channel[2].size;
case PIPE_SWIZZLE_W:
return desc->channel[3].size;
default:
return 0;
}
}
static inline VkFormat
vk_to_non_srgb_format(VkFormat format)
{
switch(format) {
case VK_FORMAT_R8_SRGB :
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_R8G8_SRGB:
return VK_FORMAT_R8G8_UNORM;
case VK_FORMAT_R8G8B8_SRGB:
return VK_FORMAT_R8G8B8_UNORM;
case VK_FORMAT_B8G8R8_SRGB:
return VK_FORMAT_B8G8R8_UNORM;
case VK_FORMAT_R8G8B8A8_SRGB :
return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_B8G8R8A8_SRGB:
return VK_FORMAT_B8G8R8A8_UNORM;
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
default:
return format;
}
switch (format) {
case VK_FORMAT_R8_SRGB:
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_R8G8_SRGB:
return VK_FORMAT_R8G8_UNORM;
case VK_FORMAT_R8G8B8_SRGB:
return VK_FORMAT_R8G8B8_UNORM;
case VK_FORMAT_B8G8R8_SRGB:
return VK_FORMAT_B8G8R8_UNORM;
case VK_FORMAT_R8G8B8A8_SRGB:
return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_B8G8R8A8_SRGB:
return VK_FORMAT_B8G8R8A8_UNORM;
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
default:
return format;
}
}
static inline unsigned
vk_format_get_nr_components(VkFormat format)
{
return util_format_get_nr_components(vk_format_to_pipe_format(format));
return util_format_get_nr_components(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_plane_count(VkFormat format)
{
return util_format_get_num_planes(vk_format_to_pipe_format(format));
return util_format_get_num_planes(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_plane_width(VkFormat format, unsigned plane,
unsigned width)
vk_format_get_plane_width(VkFormat format, unsigned plane, unsigned width)
{
return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
}
static inline unsigned
vk_format_get_plane_height(VkFormat format, unsigned plane,
unsigned height)
vk_format_get_plane_height(VkFormat format, unsigned plane, unsigned height)
{
return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
}
static inline VkFormat
vk_format_get_plane_format(VkFormat format, unsigned plane_id)
{
assert(plane_id < vk_format_get_plane_count(format));
assert(plane_id < vk_format_get_plane_count(format));
switch(format) {
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
return VK_FORMAT_R16_UNORM;
case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
default:
assert(vk_format_get_plane_count(format) == 1);
return format;
}
switch (format) {
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
return VK_FORMAT_R8_UNORM;
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
return VK_FORMAT_R16_UNORM;
case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
default:
assert(vk_format_get_plane_count(format) == 1);
return format;
}
}
#endif /* VK_FORMAT_H */
File diff suppressed because it is too large Load Diff
+30 -31
View File
@@ -31,46 +31,45 @@
#include "radv_amdgpu_winsys.h"
struct radv_amdgpu_map_range {
uint64_t offset;
uint64_t size;
struct radv_amdgpu_winsys_bo *bo;
uint64_t bo_offset;
uint64_t offset;
uint64_t size;
struct radv_amdgpu_winsys_bo *bo;
uint64_t bo_offset;
};
struct radv_amdgpu_winsys_bo {
struct radeon_winsys_bo base;
amdgpu_va_handle va_handle;
uint64_t size;
bool is_virtual;
uint8_t priority;
int ref_count;
struct radeon_winsys_bo base;
amdgpu_va_handle va_handle;
uint64_t size;
bool is_virtual;
uint8_t priority;
int ref_count;
union {
/* physical bo */
struct {
amdgpu_bo_handle bo;
bool is_shared;
uint32_t bo_handle;
};
/* virtual bo */
struct {
struct radv_amdgpu_map_range *ranges;
uint32_t range_count;
uint32_t range_capacity;
union {
/* physical bo */
struct {
amdgpu_bo_handle bo;
bool is_shared;
uint32_t bo_handle;
};
/* virtual bo */
struct {
struct radv_amdgpu_map_range *ranges;
uint32_t range_count;
uint32_t range_capacity;
struct radv_amdgpu_winsys_bo **bos;
uint32_t bo_count;
uint32_t bo_capacity;
};
};
struct radv_amdgpu_winsys_bo **bos;
uint32_t bo_count;
uint32_t bo_capacity;
};
};
};
static inline
struct radv_amdgpu_winsys_bo *radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
static inline struct radv_amdgpu_winsys_bo *
radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
{
return (struct radv_amdgpu_winsys_bo *)bo;
return (struct radv_amdgpu_winsys_bo *)bo;
}
void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws);
File diff suppressed because it is too large Load Diff
+13 -16
View File
@@ -29,37 +29,34 @@
#ifndef RADV_AMDGPU_CS_H
#define RADV_AMDGPU_CS_H
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <amdgpu.h>
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "radv_radeon_winsys.h"
#include "radv_amdgpu_winsys.h"
#include "radv_radeon_winsys.h"
enum {
MAX_RINGS_PER_TYPE = 8
};
enum { MAX_RINGS_PER_TYPE = 8 };
struct radv_amdgpu_fence {
struct amdgpu_cs_fence fence;
volatile uint64_t *user_ptr;
struct amdgpu_cs_fence fence;
volatile uint64_t *user_ptr;
};
struct radv_amdgpu_ctx {
struct radv_amdgpu_winsys *ws;
amdgpu_context_handle ctx;
struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
struct radv_amdgpu_winsys *ws;
amdgpu_context_handle ctx;
struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
struct radeon_winsys_bo *fence_bo;
uint64_t *fence_map;
struct radeon_winsys_bo *fence_bo;
uint64_t *fence_map;
};
static inline struct radv_amdgpu_ctx *
radv_amdgpu_ctx(struct radeon_winsys_ctx *base)
{
return (struct radv_amdgpu_ctx *)base;
return (struct radv_amdgpu_ctx *)base;
}
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws);
@@ -28,77 +28,77 @@
#include <errno.h>
#include "radv_private.h"
#include "util/bitset.h"
#include "radv_amdgpu_winsys.h"
#include "radv_amdgpu_surface.h"
#include "radv_amdgpu_winsys.h"
#include "radv_private.h"
#include "sid.h"
#include "ac_surface.h"
static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
const struct radeon_surf *surf)
static int
radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info, const struct radeon_surf *surf)
{
unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
if (!surf->blk_w || !surf->blk_h)
return -EINVAL;
if (!surf->blk_w || !surf->blk_h)
return -EINVAL;
switch (type) {
case RADEON_SURF_TYPE_1D:
if (surf_info->height > 1)
return -EINVAL;
/* fall through */
case RADEON_SURF_TYPE_2D:
case RADEON_SURF_TYPE_CUBEMAP:
if (surf_info->depth > 1 || surf_info->array_size > 1)
return -EINVAL;
break;
case RADEON_SURF_TYPE_3D:
if (surf_info->array_size > 1)
return -EINVAL;
break;
case RADEON_SURF_TYPE_1D_ARRAY:
if (surf_info->height > 1)
return -EINVAL;
/* fall through */
case RADEON_SURF_TYPE_2D_ARRAY:
if (surf_info->depth > 1)
return -EINVAL;
break;
default:
return -EINVAL;
}
return 0;
switch (type) {
case RADEON_SURF_TYPE_1D:
if (surf_info->height > 1)
return -EINVAL;
/* fall through */
case RADEON_SURF_TYPE_2D:
case RADEON_SURF_TYPE_CUBEMAP:
if (surf_info->depth > 1 || surf_info->array_size > 1)
return -EINVAL;
break;
case RADEON_SURF_TYPE_3D:
if (surf_info->array_size > 1)
return -EINVAL;
break;
case RADEON_SURF_TYPE_1D_ARRAY:
if (surf_info->height > 1)
return -EINVAL;
/* fall through */
case RADEON_SURF_TYPE_2D_ARRAY:
if (surf_info->depth > 1)
return -EINVAL;
break;
default:
return -EINVAL;
}
return 0;
}
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
const struct ac_surf_info *surf_info,
struct radeon_surf *surf)
static int
radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, const struct ac_surf_info *surf_info,
struct radeon_surf *surf)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
unsigned mode, type;
int r;
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
unsigned mode, type;
int r;
r = radv_amdgpu_surface_sanity(surf_info, surf);
if (r)
return r;
r = radv_amdgpu_surface_sanity(surf_info, surf);
if (r)
return r;
type = RADEON_SURF_GET(surf->flags, TYPE);
mode = RADEON_SURF_GET(surf->flags, MODE);
type = RADEON_SURF_GET(surf->flags, TYPE);
mode = RADEON_SURF_GET(surf->flags, MODE);
struct ac_surf_config config;
struct ac_surf_config config;
memcpy(&config.info, surf_info, sizeof(config.info));
config.is_1d = type == RADEON_SURF_TYPE_1D ||
type == RADEON_SURF_TYPE_1D_ARRAY;
config.is_3d = type == RADEON_SURF_TYPE_3D;
config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
memcpy(&config.info, surf_info, sizeof(config.info));
config.is_1d = type == RADEON_SURF_TYPE_1D || type == RADEON_SURF_TYPE_1D_ARRAY;
config.is_3d = type == RADEON_SURF_TYPE_3D;
config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
}
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
void
radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
{
ws->base.surface_init = radv_amdgpu_winsys_surface_init;
ws->base.surface_init = radv_amdgpu_winsys_surface_init;
}
@@ -27,6 +27,8 @@
#include <amdgpu.h>
struct radv_amdgpu_winsys;
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws);
#endif /* RADV_AMDGPU_SURFACE_H */
+180 -189
View File
@@ -25,255 +25,246 @@
* IN THE SOFTWARE.
*/
#include "radv_amdgpu_winsys.h"
#include "radv_amdgpu_winsys_public.h"
#include "radv_amdgpu_surface.h"
#include "radv_debug.h"
#include "ac_surface.h"
#include "xf86drm.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "drm-uapi/amdgpu_drm.h"
#include <assert.h>
#include "radv_amdgpu_cs.h"
#include "ac_surface.h"
#include "radv_amdgpu_bo.h"
#include "radv_amdgpu_cs.h"
#include "radv_amdgpu_surface.h"
#include "radv_amdgpu_winsys_public.h"
#include "radv_debug.h"
#include "xf86drm.h"
static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
return false;
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
return false;
if (ws->info.drm_minor < 35) {
fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
return false;
}
if (ws->info.drm_minor < 35) {
fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
return false;
}
/* LLVM 11 is required for GFX10.3. */
if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
return false;
}
/* LLVM 11 is required for GFX10.3. */
if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
return false;
}
/* LLVM 9.0 is required for GFX10. */
if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
return false;
}
/* LLVM 9.0 is required for GFX10. */
if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
return false;
}
ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
if (!ws->addrlib) {
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
return false;
}
ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
if (!ws->addrlib) {
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
return false;
}
ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
ws->use_ib_bos = ws->info.chip_class >= GFX7;
return true;
ws->use_ib_bos = ws->info.chip_class >= GFX7;
return true;
}
static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws,
struct radeon_info *info)
static void
radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
{
*info = ((struct radv_amdgpu_winsys *)rws)->info;
*info = ((struct radv_amdgpu_winsys *)rws)->info;
}
static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws,
enum radeon_value_id value)
static uint64_t
radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id value)
{
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
struct amdgpu_heap_info heap;
uint64_t retval = 0;
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
struct amdgpu_heap_info heap;
uint64_t retval = 0;
switch (value) {
case RADEON_ALLOCATED_VRAM:
return ws->allocated_vram;
case RADEON_ALLOCATED_VRAM_VIS:
return ws->allocated_vram_vis;
case RADEON_ALLOCATED_GTT:
return ws->allocated_gtt;
case RADEON_TIMESTAMP:
amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
return retval;
case RADEON_NUM_BYTES_MOVED:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED,
8, &retval);
return retval;
case RADEON_NUM_EVICTIONS:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS,
8, &retval);
return retval;
case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS,
8, &retval);
return retval;
case RADEON_VRAM_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
0, &heap);
return heap.heap_usage;
case RADEON_VRAM_VIS_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
&heap);
return heap.heap_usage;
case RADEON_GTT_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT,
0, &heap);
return heap.heap_usage;
case RADEON_GPU_TEMPERATURE:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP,
4, &retval);
return retval;
case RADEON_CURRENT_SCLK:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK,
4, &retval);
return retval;
case RADEON_CURRENT_MCLK:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK,
4, &retval);
return retval;
default:
unreachable("invalid query value");
}
switch (value) {
case RADEON_ALLOCATED_VRAM:
return ws->allocated_vram;
case RADEON_ALLOCATED_VRAM_VIS:
return ws->allocated_vram_vis;
case RADEON_ALLOCATED_GTT:
return ws->allocated_gtt;
case RADEON_TIMESTAMP:
amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
return retval;
case RADEON_NUM_BYTES_MOVED:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
return retval;
case RADEON_NUM_EVICTIONS:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
return retval;
case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
return retval;
case RADEON_VRAM_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
return heap.heap_usage;
case RADEON_VRAM_VIS_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
&heap);
return heap.heap_usage;
case RADEON_GTT_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
return heap.heap_usage;
case RADEON_GPU_TEMPERATURE:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval);
return retval;
case RADEON_CURRENT_SCLK:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval);
return retval;
case RADEON_CURRENT_MCLK:
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval);
return retval;
default:
unreachable("invalid query value");
}
return 0;
return 0;
}
static bool radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws,
unsigned reg_offset,
unsigned num_registers, uint32_t *out)
static bool
radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset,
unsigned num_registers, uint32_t *out)
{
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
0xffffffff, 0, out) == 0;
return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, 0xffffffff, 0, out) == 0;
}
static const char *radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
static const char *
radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
{
amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
return amdgpu_get_marketing_name(dev);
return amdgpu_get_marketing_name(dev);
}
static simple_mtx_t winsys_creation_mutex = _SIMPLE_MTX_INITIALIZER_NP;
static struct hash_table *winsyses = NULL;
static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
static void
radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
{
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
bool destroy = false;
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
bool destroy = false;
simple_mtx_lock(&winsys_creation_mutex);
if (!--ws->refcount) {
_mesa_hash_table_remove_key(winsyses, ws->dev);
simple_mtx_lock(&winsys_creation_mutex);
if (!--ws->refcount) {
_mesa_hash_table_remove_key(winsyses, ws->dev);
/* Clean the hashtable up if empty, though there is no
* empty function. */
if (_mesa_hash_table_num_entries(winsyses) == 0) {
_mesa_hash_table_destroy(winsyses, NULL);
winsyses = NULL;
}
/* Clean the hashtable up if empty, though there is no
* empty function. */
if (_mesa_hash_table_num_entries(winsyses) == 0) {
_mesa_hash_table_destroy(winsyses, NULL);
winsyses = NULL;
}
destroy = true;
}
simple_mtx_unlock(&winsys_creation_mutex);
if (!destroy)
return;
destroy = true;
}
simple_mtx_unlock(&winsys_creation_mutex);
if (!destroy)
return;
for (unsigned i = 0; i < ws->syncobj_count; ++i)
amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
free(ws->syncobj);
for (unsigned i = 0; i < ws->syncobj_count; ++i)
amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
free(ws->syncobj);
u_rwlock_destroy(&ws->global_bo_list.lock);
free(ws->global_bo_list.bos);
u_rwlock_destroy(&ws->global_bo_list.lock);
free(ws->global_bo_list.bos);
pthread_mutex_destroy(&ws->syncobj_lock);
u_rwlock_destroy(&ws->log_bo_list_lock);
ac_addrlib_destroy(ws->addrlib);
amdgpu_device_deinitialize(ws->dev);
FREE(rws);
pthread_mutex_destroy(&ws->syncobj_lock);
u_rwlock_destroy(&ws->log_bo_list_lock);
ac_addrlib_destroy(ws->addrlib);
amdgpu_device_deinitialize(ws->dev);
FREE(rws);
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
{
uint32_t drm_major, drm_minor, r;
amdgpu_device_handle dev;
struct radv_amdgpu_winsys *ws = NULL;
uint32_t drm_major, drm_minor, r;
amdgpu_device_handle dev;
struct radv_amdgpu_winsys *ws = NULL;
r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
if (r)
return NULL;
r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
if (r)
return NULL;
/* We have to keep this lock till insertion. */
simple_mtx_lock(&winsys_creation_mutex);
if (!winsyses)
winsyses = _mesa_pointer_hash_table_create(NULL);
if (!winsyses)
goto fail;
/* We have to keep this lock till insertion. */
simple_mtx_lock(&winsys_creation_mutex);
if (!winsyses)
winsyses = _mesa_pointer_hash_table_create(NULL);
if (!winsyses)
goto fail;
struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
if (entry) {
ws = (struct radv_amdgpu_winsys *)entry->data;
++ws->refcount;
}
struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
if (entry) {
ws = (struct radv_amdgpu_winsys *)entry->data;
++ws->refcount;
}
if (ws) {
simple_mtx_unlock(&winsys_creation_mutex);
amdgpu_device_deinitialize(dev);
return &ws->base;
}
if (ws) {
simple_mtx_unlock(&winsys_creation_mutex);
amdgpu_device_deinitialize(dev);
return &ws->base;
}
ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
if (!ws)
goto fail;
ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
if (!ws)
goto fail;
ws->refcount = 1;
ws->dev = dev;
ws->info.drm_major = drm_major;
ws->info.drm_minor = drm_minor;
if (!do_winsys_init(ws, fd))
goto winsys_fail;
ws->refcount = 1;
ws->dev = dev;
ws->info.drm_major = drm_major;
ws->info.drm_minor = drm_minor;
if (!do_winsys_init(ws, fd))
goto winsys_fail;
ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
if (debug_flags & RADV_DEBUG_NO_IBS)
ws->use_ib_bos = false;
ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
if (debug_flags & RADV_DEBUG_NO_IBS)
ws->use_ib_bos = false;
ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
u_rwlock_init(&ws->global_bo_list.lock);
list_inithead(&ws->log_bo_list);
u_rwlock_init(&ws->log_bo_list_lock);
pthread_mutex_init(&ws->syncobj_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;
ws->base.query_value = radv_amdgpu_winsys_query_value;
ws->base.read_registers = radv_amdgpu_winsys_read_registers;
ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
ws->base.destroy = radv_amdgpu_winsys_destroy;
radv_amdgpu_bo_init_functions(ws);
radv_amdgpu_cs_init_functions(ws);
radv_amdgpu_surface_init_functions(ws);
ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
u_rwlock_init(&ws->global_bo_list.lock);
list_inithead(&ws->log_bo_list);
u_rwlock_init(&ws->log_bo_list_lock);
pthread_mutex_init(&ws->syncobj_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;
ws->base.query_value = radv_amdgpu_winsys_query_value;
ws->base.read_registers = radv_amdgpu_winsys_read_registers;
ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
ws->base.destroy = radv_amdgpu_winsys_destroy;
radv_amdgpu_bo_init_functions(ws);
radv_amdgpu_cs_init_functions(ws);
radv_amdgpu_surface_init_functions(ws);
_mesa_hash_table_insert(winsyses, dev, ws);
simple_mtx_unlock(&winsys_creation_mutex);
_mesa_hash_table_insert(winsyses, dev, ws);
simple_mtx_unlock(&winsys_creation_mutex);
return &ws->base;
return &ws->base;
winsys_fail:
free(ws);
free(ws);
fail:
if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
_mesa_hash_table_destroy(winsyses, NULL);
winsyses = NULL;
}
simple_mtx_unlock(&winsys_creation_mutex);
amdgpu_device_deinitialize(dev);
return NULL;
if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
_mesa_hash_table_destroy(winsyses, NULL);
winsyses = NULL;
}
simple_mtx_unlock(&winsys_creation_mutex);
amdgpu_device_deinitialize(dev);
return NULL;
}
@@ -28,57 +28,57 @@
#ifndef RADV_AMDGPU_WINSYS_H
#define RADV_AMDGPU_WINSYS_H
#include "radv_radeon_winsys.h"
#include "ac_gpu_info.h"
#include <amdgpu.h>
#include <pthread.h>
#include "util/list.h"
#include "util/rwlock.h"
#include <pthread.h>
#include "ac_gpu_info.h"
#include "radv_radeon_winsys.h"
struct radv_amdgpu_winsys {
struct radeon_winsys base;
amdgpu_device_handle dev;
struct radeon_winsys base;
amdgpu_device_handle dev;
struct radeon_info info;
struct amdgpu_gpu_info amdinfo;
struct ac_addrlib *addrlib;
struct radeon_info info;
struct amdgpu_gpu_info amdinfo;
struct ac_addrlib *addrlib;
bool debug_all_bos;
bool debug_log_bos;
bool use_ib_bos;
enum radeon_bo_domain cs_bo_domain;
bool zero_all_vram_allocs;
bool use_local_bos;
bool use_llvm;
bool debug_all_bos;
bool debug_log_bos;
bool use_ib_bos;
enum radeon_bo_domain cs_bo_domain;
bool zero_all_vram_allocs;
bool use_local_bos;
bool use_llvm;
uint64_t allocated_vram;
uint64_t allocated_vram_vis;
uint64_t allocated_gtt;
uint64_t allocated_vram;
uint64_t allocated_vram_vis;
uint64_t allocated_gtt;
/* Global BO list */
struct {
struct radv_amdgpu_winsys_bo **bos;
uint32_t count;
uint32_t capacity;
struct u_rwlock lock;
} global_bo_list;
/* Global BO list */
struct {
struct radv_amdgpu_winsys_bo **bos;
uint32_t count;
uint32_t capacity;
struct u_rwlock lock;
} global_bo_list;
/* syncobj cache */
pthread_mutex_t syncobj_lock;
uint32_t *syncobj;
uint32_t syncobj_count, syncobj_capacity;
/* syncobj cache */
pthread_mutex_t syncobj_lock;
uint32_t *syncobj;
uint32_t syncobj_count, syncobj_capacity;
/* BO log */
struct u_rwlock log_bo_list_lock;
struct list_head log_bo_list;
/* BO log */
struct u_rwlock log_bo_list_lock;
struct list_head log_bo_list;
uint32_t refcount;
uint32_t refcount;
};
static inline struct radv_amdgpu_winsys *
radv_amdgpu_winsys(struct radeon_winsys *base)
{
return (struct radv_amdgpu_winsys*)base;
return (struct radv_amdgpu_winsys *)base;
}
#endif /* RADV_AMDGPU_WINSYS_H */
@@ -30,7 +30,7 @@
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
uint64_t perftest_flags);
uint64_t perftest_flags);
struct radeon_winsys *radv_dummy_winsys_create(void);
+26 -28
View File
@@ -29,34 +29,31 @@
#include "util/u_memory.h"
static struct radeon_winsys_bo *
radv_null_winsys_bo_create(struct radeon_winsys *_ws,
uint64_t size,
unsigned alignment,
enum radeon_bo_domain initial_domain,
enum radeon_bo_flag flags,
unsigned priority)
radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
unsigned priority)
{
struct radv_null_winsys_bo *bo;
struct radv_null_winsys_bo *bo;
bo = CALLOC_STRUCT(radv_null_winsys_bo);
if (!bo)
return NULL;
bo = CALLOC_STRUCT(radv_null_winsys_bo);
if (!bo)
return NULL;
bo->ptr = malloc(size);
if (!bo->ptr)
goto error_ptr_alloc;
bo->ptr = malloc(size);
if (!bo->ptr)
goto error_ptr_alloc;
return (struct radeon_winsys_bo *)bo;
return (struct radeon_winsys_bo *)bo;
error_ptr_alloc:
FREE(bo);
return NULL;
FREE(bo);
return NULL;
}
static void *
radv_null_winsys_bo_map(struct radeon_winsys_bo *_bo)
{
struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
return bo->ptr;
struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
return bo->ptr;
}
static void
@@ -64,18 +61,19 @@ radv_null_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
{
}
static void radv_null_winsys_bo_destroy(struct radeon_winsys *_ws,
struct radeon_winsys_bo *_bo)
static void
radv_null_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
{
struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
FREE(bo->ptr);
FREE(bo);
struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
FREE(bo->ptr);
FREE(bo);
}
void radv_null_bo_init_functions(struct radv_null_winsys *ws)
void
radv_null_bo_init_functions(struct radv_null_winsys *ws)
{
ws->base.buffer_create = radv_null_winsys_bo_create;
ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
ws->base.buffer_map = radv_null_winsys_bo_map;
ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
ws->base.buffer_create = radv_null_winsys_bo_create;
ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
ws->base.buffer_map = radv_null_winsys_bo_map;
ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
}
+6 -6
View File
@@ -31,15 +31,15 @@
#include "radv_null_winsys.h"
struct radv_null_winsys_bo {
struct radeon_winsys_bo base;
struct radv_null_winsys *ws;
void *ptr;
struct radeon_winsys_bo base;
struct radv_null_winsys *ws;
void *ptr;
};
static inline
struct radv_null_winsys_bo *radv_null_winsys_bo(struct radeon_winsys_bo *bo)
static inline struct radv_null_winsys_bo *
radv_null_winsys_bo(struct radeon_winsys_bo *bo)
{
return (struct radv_null_winsys_bo *)bo;
return (struct radv_null_winsys_bo *)bo;
}
void radv_null_bo_init_functions(struct radv_null_winsys *ws);
+42 -40
View File
@@ -29,73 +29,75 @@
#include "util/u_memory.h"
struct radv_null_cs {
struct radeon_cmdbuf base;
struct radv_null_winsys *ws;
struct radeon_cmdbuf base;
struct radv_null_winsys *ws;
};
static inline struct radv_null_cs *
radv_null_cs(struct radeon_cmdbuf *base)
{
return (struct radv_null_cs*)base;
return (struct radv_null_cs *)base;
}
static VkResult radv_null_ctx_create(struct radeon_winsys *_ws,
enum radeon_ctx_priority priority,
struct radeon_winsys_ctx **rctx)
static VkResult
radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
struct radeon_winsys_ctx **rctx)
{
struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
if (!ctx)
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (!ctx)
return VK_ERROR_OUT_OF_HOST_MEMORY;
*rctx = (struct radeon_winsys_ctx *)ctx;
return VK_SUCCESS;
*rctx = (struct radeon_winsys_ctx *)ctx;
return VK_SUCCESS;
}
static void radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
static void
radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
{
struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
FREE(ctx);
struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
FREE(ctx);
}
static struct radeon_cmdbuf *
radv_null_cs_create(struct radeon_winsys *ws,
enum ring_type ring_type)
radv_null_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
{
struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
if (!cs)
return NULL;
struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
if (!cs)
return NULL;
cs->ws = radv_null_winsys(ws);
cs->ws = radv_null_winsys(ws);
cs->base.buf = malloc(16384);
cs->base.max_dw = 4096;
if (!cs->base.buf) {
FREE(cs);
return NULL;
}
cs->base.buf = malloc(16384);
cs->base.max_dw = 4096;
if (!cs->base.buf) {
FREE(cs);
return NULL;
}
return &cs->base;
return &cs->base;
}
static VkResult radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
static VkResult
radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
{
return VK_SUCCESS;
return VK_SUCCESS;
}
static void radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
static void
radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
{
struct radv_null_cs *cs = radv_null_cs(rcs);
FREE(cs->base.buf);
FREE(cs);
struct radv_null_cs *cs = radv_null_cs(rcs);
FREE(cs->base.buf);
FREE(cs);
}
void radv_null_cs_init_functions(struct radv_null_winsys *ws)
void
radv_null_cs_init_functions(struct radv_null_winsys *ws)
{
ws->base.ctx_create = radv_null_ctx_create;
ws->base.ctx_destroy = radv_null_ctx_destroy;
ws->base.cs_create = radv_null_cs_create;
ws->base.cs_finalize = radv_null_cs_finalize;
ws->base.cs_destroy = radv_null_cs_destroy;
ws->base.ctx_create = radv_null_ctx_create;
ws->base.ctx_destroy = radv_null_ctx_destroy;
ws->base.cs_create = radv_null_cs_create;
ws->base.cs_finalize = radv_null_cs_finalize;
ws->base.cs_destroy = radv_null_cs_destroy;
}
+3 -3
View File
@@ -28,17 +28,17 @@
#ifndef RADV_NULL_CS_H
#define RADV_NULL_CS_H
#include "radv_radeon_winsys.h"
#include "radv_null_winsys.h"
#include "radv_radeon_winsys.h"
struct radv_null_ctx {
struct radv_null_winsys *ws;
struct radv_null_winsys *ws;
};
static inline struct radv_null_ctx *
radv_null_ctx(struct radeon_winsys_ctx *base)
{
return (struct radv_null_ctx *)base;
return (struct radv_null_ctx *)base;
}
void radv_null_cs_init_functions(struct radv_null_winsys *ws);
+103 -101
View File
@@ -33,129 +33,131 @@
/* Hardcode some GPU info that are needed for the driver or for some tools. */
static const struct {
uint32_t pci_id;
uint32_t num_render_backends;
bool has_dedicated_vram;
uint32_t pci_id;
uint32_t num_render_backends;
bool has_dedicated_vram;
} gpu_info[] = {
[CHIP_TAHITI] = { 0x6780, 8, true },
[CHIP_PITCAIRN] = { 0x6800, 8, true },
[CHIP_VERDE] = { 0x6820, 4, true },
[CHIP_OLAND] = { 0x6060, 2, true },
[CHIP_HAINAN] = { 0x6660, 2, true },
[CHIP_BONAIRE] = { 0x6640, 4, true },
[CHIP_KAVERI] = { 0x1304, 2, false },
[CHIP_KABINI] = { 0x9830, 2, false },
[CHIP_HAWAII] = { 0x67A0, 16, true },
[CHIP_TONGA] = { 0x6920, 8, true },
[CHIP_ICELAND] = { 0x6900, 2, true },
[CHIP_CARRIZO] = { 0x9870, 2, false },
[CHIP_FIJI] = { 0x7300, 16, true },
[CHIP_STONEY] = { 0x98E4, 2, false },
[CHIP_POLARIS10] = { 0x67C0, 8, true },
[CHIP_POLARIS11] = { 0x67E0, 4, true },
[CHIP_POLARIS12] = { 0x6980, 4, true },
[CHIP_VEGAM] = { 0x694C, 4, true },
[CHIP_VEGA10] = { 0x6860, 16, true },
[CHIP_VEGA12] = { 0x69A0, 8, true },
[CHIP_VEGA20] = { 0x66A0, 16, true },
[CHIP_RAVEN] = { 0x15DD, 2, false },
[CHIP_RENOIR] = { 0x1636, 2, false },
[CHIP_ARCTURUS] = { 0x738C, 2, true },
[CHIP_NAVI10] = { 0x7310, 16, true },
[CHIP_NAVI12] = { 0x7360, 8, true },
[CHIP_NAVI14] = { 0x7340, 8, true },
[CHIP_SIENNA_CICHLID] = { 0x73A0, 8, true },
[CHIP_VANGOGH] = { 0x163F, 8, false },
[CHIP_NAVY_FLOUNDER] = { 0x73C0, 8, true },
[CHIP_DIMGREY_CAVEFISH] = { 0x73E0, 8, true },
[CHIP_TAHITI] = {0x6780, 8, true},
[CHIP_PITCAIRN] = {0x6800, 8, true},
[CHIP_VERDE] = {0x6820, 4, true},
[CHIP_OLAND] = {0x6060, 2, true},
[CHIP_HAINAN] = {0x6660, 2, true},
[CHIP_BONAIRE] = {0x6640, 4, true},
[CHIP_KAVERI] = {0x1304, 2, false},
[CHIP_KABINI] = {0x9830, 2, false},
[CHIP_HAWAII] = {0x67A0, 16, true},
[CHIP_TONGA] = {0x6920, 8, true},
[CHIP_ICELAND] = {0x6900, 2, true},
[CHIP_CARRIZO] = {0x9870, 2, false},
[CHIP_FIJI] = {0x7300, 16, true},
[CHIP_STONEY] = {0x98E4, 2, false},
[CHIP_POLARIS10] = {0x67C0, 8, true},
[CHIP_POLARIS11] = {0x67E0, 4, true},
[CHIP_POLARIS12] = {0x6980, 4, true},
[CHIP_VEGAM] = {0x694C, 4, true},
[CHIP_VEGA10] = {0x6860, 16, true},
[CHIP_VEGA12] = {0x69A0, 8, true},
[CHIP_VEGA20] = {0x66A0, 16, true},
[CHIP_RAVEN] = {0x15DD, 2, false},
[CHIP_RENOIR] = {0x1636, 2, false},
[CHIP_ARCTURUS] = {0x738C, 2, true},
[CHIP_NAVI10] = {0x7310, 16, true},
[CHIP_NAVI12] = {0x7360, 8, true},
[CHIP_NAVI14] = {0x7340, 8, true},
[CHIP_SIENNA_CICHLID] = {0x73A0, 8, true},
[CHIP_VANGOGH] = {0x163F, 8, false},
[CHIP_NAVY_FLOUNDER] = {0x73C0, 8, true},
[CHIP_DIMGREY_CAVEFISH] = {0x73E0, 8, true},
};
static void radv_null_winsys_query_info(struct radeon_winsys *rws,
struct radeon_info *info)
static void
radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
{
const char *family = getenv("RADV_FORCE_FAMILY");
unsigned i;
const char *family = getenv("RADV_FORCE_FAMILY");
unsigned i;
info->chip_class = CLASS_UNKNOWN;
info->family = CHIP_UNKNOWN;
info->chip_class = CLASS_UNKNOWN;
info->family = CHIP_UNKNOWN;
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
if (!strcmp(family, ac_get_family_name(i))) {
/* Override family and chip_class. */
info->family = i;
info->name = "OVERRIDDEN";
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
if (!strcmp(family, ac_get_family_name(i))) {
/* Override family and chip_class. */
info->family = i;
info->name = "OVERRIDDEN";
if (i >= CHIP_SIENNA_CICHLID)
info->chip_class = GFX10_3;
else if (i >= CHIP_NAVI10)
info->chip_class = GFX10;
else if (i >= CHIP_VEGA10)
info->chip_class = GFX9;
else if (i >= CHIP_TONGA)
info->chip_class = GFX8;
else if (i >= CHIP_BONAIRE)
info->chip_class = GFX7;
else
info->chip_class = GFX6;
}
}
if (i >= CHIP_SIENNA_CICHLID)
info->chip_class = GFX10_3;
else if (i >= CHIP_NAVI10)
info->chip_class = GFX10;
else if (i >= CHIP_VEGA10)
info->chip_class = GFX9;
else if (i >= CHIP_TONGA)
info->chip_class = GFX8;
else if (i >= CHIP_BONAIRE)
info->chip_class = GFX7;
else
info->chip_class = GFX6;
}
}
if (info->family == CHIP_UNKNOWN) {
fprintf(stderr, "radv: Unknown family: %s\n", family);
abort();
}
if (info->family == CHIP_UNKNOWN) {
fprintf(stderr, "radv: Unknown family: %s\n", family);
abort();
}
info->pci_id = gpu_info[info->family].pci_id;
info->max_se = 4;
info->num_se = 4;
if (info->chip_class >= GFX10_3)
info->max_wave64_per_simd = 16;
else if (info->chip_class >= GFX10)
info->max_wave64_per_simd = 20;
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
info->max_wave64_per_simd = 8;
else
info->max_wave64_per_simd = 10;
info->pci_id = gpu_info[info->family].pci_id;
info->max_se = 4;
info->num_se = 4;
if (info->chip_class >= GFX10_3)
info->max_wave64_per_simd = 16;
else if (info->chip_class >= GFX10)
info->max_wave64_per_simd = 20;
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
info->max_wave64_per_simd = 8;
else
info->max_wave64_per_simd = 10;
if (info->chip_class >= GFX10)
info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
else if (info->chip_class >= GFX8)
info->num_physical_sgprs_per_simd = 800;
else
info->num_physical_sgprs_per_simd = 512;
if (info->chip_class >= GFX10)
info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
else if (info->chip_class >= GFX8)
info->num_physical_sgprs_per_simd = 800;
else
info->num_physical_sgprs_per_simd = 512;
info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
info->max_render_backends = gpu_info[info->family].num_render_backends;
info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
info->lds_alloc_granularity =
info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
info->max_render_backends = gpu_info[info->family].num_render_backends;
info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
info->has_packed_math_16bit = info->chip_class >= GFX9;
info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
info->has_packed_math_16bit = info->chip_class >= GFX9;
info->has_image_load_dcc_bug = info->family == CHIP_DIMGREY_CAVEFISH ||
info->family == CHIP_VANGOGH;
info->has_image_load_dcc_bug =
info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_VANGOGH;
}
static void radv_null_winsys_destroy(struct radeon_winsys *rws)
static void
radv_null_winsys_destroy(struct radeon_winsys *rws)
{
FREE(rws);
FREE(rws);
}
struct radeon_winsys *
radv_null_winsys_create()
{
struct radv_null_winsys *ws;
struct radv_null_winsys *ws;
ws = calloc(1, sizeof(struct radv_null_winsys));
if (!ws)
return NULL;
ws = calloc(1, sizeof(struct radv_null_winsys));
if (!ws)
return NULL;
ws->base.destroy = radv_null_winsys_destroy;
ws->base.query_info = radv_null_winsys_query_info;
radv_null_bo_init_functions(ws);
radv_null_cs_init_functions(ws);
ws->base.destroy = radv_null_winsys_destroy;
ws->base.query_info = radv_null_winsys_query_info;
radv_null_bo_init_functions(ws);
radv_null_cs_init_functions(ws);
return &ws->base;
return &ws->base;
}
@@ -28,18 +28,18 @@
#ifndef RADV_NULL_WINSYS_H
#define RADV_NULL_WINSYS_H
#include "radv_radeon_winsys.h"
#include "ac_gpu_info.h"
#include "util/list.h"
#include "ac_gpu_info.h"
#include "radv_radeon_winsys.h"
struct radv_null_winsys {
struct radeon_winsys base;
struct radeon_winsys base;
};
static inline struct radv_null_winsys *
radv_null_winsys(struct radeon_winsys *base)
{
return (struct radv_null_winsys*)base;
return (struct radv_null_winsys *)base;
}
#endif /* RADV_NULL_WINSYS_H */