anv: Move invariant state to small initial batch

We use the simple batch helper to submit a batch at driver startup time
which holds all the state that never changes.  We don't have a whole lot
and once we enable tesselation there'll be even less. Even so, it's a
simple mechanism and reduces our steady state batch sizes a bit.
This commit is contained in:
Kristian Høgsberg Kristensen
2016-02-05 16:11:12 -08:00
parent c9c3344c4f
commit 6cdada0360
6 changed files with 158 additions and 94 deletions
+23 -4
View File
@@ -673,8 +673,9 @@ anv_device_init_border_colors(struct anv_device *device)
border_colors);
}
static VkResult
submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
VkResult
anv_device_submit_simple_batch(struct anv_device *device,
struct anv_batch *batch)
{
struct anv_state state;
struct drm_i915_gem_execbuffer2 execbuf;
@@ -685,6 +686,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
int64_t timeout;
int ret;
/* Kernel driver requires 8 byte aligned batch length */
size = align_u32(batch->next - batch->start, 8);
state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32);
bo = &device->dynamic_state_pool.block_pool->bo;
@@ -702,7 +704,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
execbuf.buffers_ptr = (uintptr_t) exec2_objects;
execbuf.buffer_count = 1;
execbuf.batch_start_offset = state.offset;
execbuf.batch_len = batch->next - state.map;
execbuf.batch_len = size;
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
@@ -814,6 +816,23 @@ VkResult anv_CreateDevice(
anv_queue_init(device, &device->queue);
switch (device->info.gen) {
case 7:
if (!device->info.is_haswell)
result = gen7_init_device_state(device);
else
result = gen75_init_device_state(device);
break;
case 8:
result = gen8_init_device_state(device);
break;
case 9:
result = gen9_init_device_state(device);
break;
}
if (result != VK_SUCCESS)
goto fail_fd;
result = anv_device_init_meta(device);
if (result != VK_SUCCESS)
goto fail_fd;
@@ -1006,7 +1025,7 @@ VkResult anv_DeviceWaitIdle(
anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
anv_batch_emit(&batch, GEN7_MI_NOOP);
return submit_simple_batch(device, &batch);
return anv_device_submit_simple_batch(device, &batch);
}
VkResult
+7
View File
@@ -662,6 +662,11 @@ struct anv_device {
pthread_mutex_t mutex;
};
VkResult gen7_init_device_state(struct anv_device *device);
VkResult gen75_init_device_state(struct anv_device *device);
VkResult gen8_init_device_state(struct anv_device *device);
VkResult gen9_init_device_state(struct anv_device *device);
void* anv_gem_mmap(struct anv_device *device,
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
void anv_gem_munmap(void *p, uint64_t size);
@@ -738,6 +743,8 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
void *location, struct anv_bo *bo, uint32_t offset);
VkResult anv_device_submit_simple_batch(struct anv_device *device,
struct anv_batch *batch);
struct anv_address {
struct anv_bo *bo;
-9
View File
@@ -214,17 +214,8 @@ genX(graphics_pipeline_create)(
gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS,
.StatisticsEnable = true);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false);
emit_urb_setup(pipeline);
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS);
const VkPipelineRasterizationStateCreateInfo *rs_info =
pCreateInfo->pRasterizationState;
+26
View File
@@ -34,6 +34,32 @@
#include "genX_state_util.h"
VkResult
genX(init_device_state)(struct anv_device *device)
{
struct anv_batch batch;
uint32_t cmds[64];
batch.start = batch.next = cmds;
batch.end = (void *) cmds + sizeof(cmds);
anv_batch_emit(&batch, GEN7_PIPELINE_SELECT,
.PipelineSelection = GPGPU);
anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
.StatisticsEnable = true);
anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false);
anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS);
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
assert(batch.next <= batch.end);
return anv_device_submit_simple_batch(device, &batch);
}
GENX_FUNC(GEN7, GEN75) void
genX(fill_buffer_surface_state)(void *state, enum isl_format format,
uint32_t offset, uint32_t range,
-81
View File
@@ -247,76 +247,6 @@ emit_ms_state(struct anv_pipeline *pipeline,
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
.SampleMask = sample_mask);
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
* VkPhysicalDeviceFeatures::standardSampleLocations.
*/
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_PATTERN),
._1xSample0XOffset = 0.5,
._1xSample0YOffset = 0.5,
._2xSample0XOffset = 0.25,
._2xSample0YOffset = 0.25,
._2xSample1XOffset = 0.75,
._2xSample1YOffset = 0.75,
._4xSample0XOffset = 0.375,
._4xSample0YOffset = 0.125,
._4xSample1XOffset = 0.875,
._4xSample1YOffset = 0.375,
._4xSample2XOffset = 0.125,
._4xSample2YOffset = 0.625,
._4xSample3XOffset = 0.625,
._4xSample3YOffset = 0.875,
._8xSample0XOffset = 0.5625,
._8xSample0YOffset = 0.3125,
._8xSample1XOffset = 0.4375,
._8xSample1YOffset = 0.6875,
._8xSample2XOffset = 0.8125,
._8xSample2YOffset = 0.5625,
._8xSample3XOffset = 0.3125,
._8xSample3YOffset = 0.1875,
._8xSample4XOffset = 0.1875,
._8xSample4YOffset = 0.8125,
._8xSample5XOffset = 0.0625,
._8xSample5YOffset = 0.4375,
._8xSample6XOffset = 0.6875,
._8xSample6YOffset = 0.9375,
._8xSample7XOffset = 0.9375,
._8xSample7YOffset = 0.0625,
#if ANV_GEN >= 9
._16xSample0XOffset = 0.5625,
._16xSample0YOffset = 0.5625,
._16xSample1XOffset = 0.4375,
._16xSample1YOffset = 0.3125,
._16xSample2XOffset = 0.3125,
._16xSample2YOffset = 0.6250,
._16xSample3XOffset = 0.7500,
._16xSample3YOffset = 0.4375,
._16xSample4XOffset = 0.1875,
._16xSample4YOffset = 0.3750,
._16xSample5XOffset = 0.6250,
._16xSample5YOffset = 0.8125,
._16xSample6XOffset = 0.8125,
._16xSample6YOffset = 0.6875,
._16xSample7XOffset = 0.6875,
._16xSample7YOffset = 0.1875,
._16xSample8XOffset = 0.3750,
._16xSample8YOffset = 0.8750,
._16xSample9XOffset = 0.5000,
._16xSample9YOffset = 0.0625,
._16xSample10XOffset = 0.2500,
._16xSample10YOffset = 0.1250,
._16xSample11XOffset = 0.1250,
._16xSample11YOffset = 0.7500,
._16xSample12XOffset = 0.0000,
._16xSample12YOffset = 0.5000,
._16xSample13XOffset = 0.9375,
._16xSample13YOffset = 0.2500,
._16xSample14XOffset = 0.8750,
._16xSample14YOffset = 0.9375,
._16xSample15XOffset = 0.0625,
._16xSample15YOffset = 0.0000,
#endif
);
}
VkResult
@@ -359,19 +289,8 @@ genX(graphics_pipeline_create)(
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS),
.StatisticsEnable = true);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), .Enable = false);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), .TEEnable = false);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
emit_urb_setup(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY),
.ChromaKeyKillEnable = false);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
.ClipEnable = true,
.ViewportXYClipTestEnable = !(extra && extra->disable_viewport),
+102
View File
@@ -34,6 +34,108 @@
#include "genX_state_util.h"
VkResult
genX(init_device_state)(struct anv_device *device)
{
struct anv_batch batch;
uint32_t cmds[64];
batch.start = batch.next = cmds;
batch.end = (void *) cmds + sizeof(cmds);
anv_batch_emit(&batch, GENX(PIPELINE_SELECT),
#if ANV_GEN >= 9
.MaskBits = 3,
#endif
.PipelineSelection = _3D);
anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
.StatisticsEnable = true);
anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false);
anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY),
.ChromaKeyKillEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
* VkPhysicalDeviceFeatures::standardSampleLocations.
*/
anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN),
._1xSample0XOffset = 0.5,
._1xSample0YOffset = 0.5,
._2xSample0XOffset = 0.25,
._2xSample0YOffset = 0.25,
._2xSample1XOffset = 0.75,
._2xSample1YOffset = 0.75,
._4xSample0XOffset = 0.375,
._4xSample0YOffset = 0.125,
._4xSample1XOffset = 0.875,
._4xSample1YOffset = 0.375,
._4xSample2XOffset = 0.125,
._4xSample2YOffset = 0.625,
._4xSample3XOffset = 0.625,
._4xSample3YOffset = 0.875,
._8xSample0XOffset = 0.5625,
._8xSample0YOffset = 0.3125,
._8xSample1XOffset = 0.4375,
._8xSample1YOffset = 0.6875,
._8xSample2XOffset = 0.8125,
._8xSample2YOffset = 0.5625,
._8xSample3XOffset = 0.3125,
._8xSample3YOffset = 0.1875,
._8xSample4XOffset = 0.1875,
._8xSample4YOffset = 0.8125,
._8xSample5XOffset = 0.0625,
._8xSample5YOffset = 0.4375,
._8xSample6XOffset = 0.6875,
._8xSample6YOffset = 0.9375,
._8xSample7XOffset = 0.9375,
._8xSample7YOffset = 0.0625,
#if ANV_GEN >= 9
._16xSample0XOffset = 0.5625,
._16xSample0YOffset = 0.5625,
._16xSample1XOffset = 0.4375,
._16xSample1YOffset = 0.3125,
._16xSample2XOffset = 0.3125,
._16xSample2YOffset = 0.6250,
._16xSample3XOffset = 0.7500,
._16xSample3YOffset = 0.4375,
._16xSample4XOffset = 0.1875,
._16xSample4YOffset = 0.3750,
._16xSample5XOffset = 0.6250,
._16xSample5YOffset = 0.8125,
._16xSample6XOffset = 0.8125,
._16xSample6YOffset = 0.6875,
._16xSample7XOffset = 0.6875,
._16xSample7YOffset = 0.1875,
._16xSample8XOffset = 0.3750,
._16xSample8YOffset = 0.8750,
._16xSample9XOffset = 0.5000,
._16xSample9YOffset = 0.0625,
._16xSample10XOffset = 0.2500,
._16xSample10YOffset = 0.1250,
._16xSample11XOffset = 0.1250,
._16xSample11YOffset = 0.7500,
._16xSample12XOffset = 0.0000,
._16xSample12YOffset = 0.5000,
._16xSample13XOffset = 0.9375,
._16xSample13YOffset = 0.2500,
._16xSample14XOffset = 0.8750,
._16xSample14YOffset = 0.9375,
._16xSample15XOffset = 0.0625,
._16xSample15YOffset = 0.0000,
#endif
);
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
assert(batch.next <= batch.end);
return anv_device_submit_simple_batch(device, &batch);
}
static const uint32_t
isl_to_gen_multisample_layout[] = {
[ISL_MSAA_LAYOUT_NONE] = MSS,