diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index c39ac11576a..6196da9325f 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -130,9 +130,6 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, res->flags |= RADEON_FLAG_DISCARDABLE; } - /* Set expected VRAM and GART usage for the buffer. */ - res->memory_usage_kb = MAX2(1, size / 1024); - if (res->domains & RADEON_DOMAIN_VRAM) { /* We don't want to evict buffers from VRAM by mapping them for CPU access, * because they might never be moved back again. If a buffer is large enough, @@ -272,7 +269,6 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *d sdst->b.b.bind = ssrc->b.b.bind; sdst->flags = ssrc->flags; - assert(sdst->memory_usage_kb == ssrc->memory_usage_kb); assert(sdst->bo_size == ssrc->bo_size); assert(sdst->bo_alignment_log2 == ssrc->bo_alignment_log2); assert(sdst->domains == ssrc->domains); @@ -633,7 +629,6 @@ static struct pipe_resource *si_buffer_from_user_memory(struct pipe_screen *scre } buf->gpu_address = ws->buffer_get_virtual_address(buf->buf); - buf->memory_usage_kb = templ->width0 / 1024; return &buf->b.b; } diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 203b1351cf4..a69703e8c9b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -972,13 +972,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info gfx11_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); } - /* Add buffer sizes for memory checking in need_cs_space. */ - si_context_add_resource_size(sctx, &program->shader.bo->b.b); - /* TODO: add the scratch buffer */ - if (info->indirect) { - si_context_add_resource_size(sctx, info->indirect); - /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (sctx->gfx_level <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WB_L2; diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index fa605d4234f..916f2d9e3ae 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -129,12 +129,6 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst uint64_t remaining_size, unsigned user_flags, enum si_coherency coher, bool *is_first, unsigned *packet_flags) { - /* Count memory usage in so that need_cs_space can take it into account. */ - if (dst) - si_context_add_resource_size(sctx, dst); - if (src) - si_context_add_resource_size(sctx, src); - if (!(user_flags & SI_OP_CPDMA_SKIP_CHECK_CS_SPACE)) si_need_gfx_cs_space(sctx, 0); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 68615386717..1bed36fa424 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -211,7 +211,7 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_reso tex = tex->flushed_depth_texture; priority = si_get_sampler_view_priority(&tex->buffer); - radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage | priority, check_mem); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, usage | priority); } static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers) @@ -1251,8 +1251,8 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res buffers->buffers[slot] = buffer; buffers->offsets[slot] = buffer_offset; - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), - RADEON_USAGE_READ | buffers->priority_constbuf, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), + RADEON_USAGE_READ | buffers->priority_constbuf); buffers->enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */ @@ -1396,8 +1396,8 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resou pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); buffers->offsets[slot] = sbuffer->buffer_offset; - radeon_add_to_gfx_buffer_list_check_mem( - sctx, buf, (writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buf, + (writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority); if (writable) buffers->writable_mask |= 1llu << slot; else @@ -1673,10 +1673,9 @@ static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_ si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); sctx->descriptors_dirty |= 1u << descriptors_idx; - radeon_add_to_gfx_buffer_list_check_mem( - sctx, si_resource(buffer), - (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | - priority, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), + (buffers->writable_mask & (1llu << i) ? + RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority); noop = false; } } @@ -1709,9 +1708,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) for (unsigned i = 0; i < ARRAY_SIZE(sctx->vertex_buffer); i++) { struct si_resource *buf = si_resource(sctx->vertex_buffer[i].buffer.resource); if (buf) { - radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, - RADEON_USAGE_READ | - RADEON_PRIO_VERTEX_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buf, + RADEON_USAGE_READ | + RADEON_PRIO_VERTEX_BUFFER); } } } else if (buffer->bind_history & SI_BIND_VERTEX_BUFFER) { @@ -1725,9 +1724,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) if (sctx->vertex_buffer[vb].buffer.resource == buf) { sctx->vertex_buffers_dirty = num_elems > 0; - radeon_add_to_gfx_buffer_list_check_mem(sctx, buffer, - RADEON_USAGE_READ | - RADEON_PRIO_VERTEX_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buffer, + RADEON_USAGE_READ | + RADEON_PRIO_VERTEX_BUFFER); break; } } @@ -1746,8 +1745,8 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_WRITE | - RADEON_PRIO_SHADER_RW_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_WRITE | + RADEON_PRIO_SHADER_RW_BUFFER); /* Update the streamout state. */ if (sctx->streamout.begin_emitted) @@ -1803,8 +1802,8 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) descs->list + desc_slot * 16 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | - RADEON_PRIO_SAMPLER_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READ | + RADEON_PRIO_SAMPLER_BUFFER); } } } @@ -1833,9 +1832,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) descs->list + desc_slot * 8 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), - RADEON_USAGE_READWRITE | - RADEON_PRIO_SAMPLER_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), + RADEON_USAGE_READWRITE | + RADEON_PRIO_SAMPLER_BUFFER); if (shader == PIPE_SHADER_COMPUTE) sctx->compute_image_sgprs_dirty = true; @@ -1860,8 +1859,8 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) (*tex_handle)->desc_dirty = true; sctx->bindless_descriptors_dirty = true; - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | - RADEON_PRIO_SAMPLER_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READ | + RADEON_PRIO_SAMPLER_BUFFER); } } } @@ -1885,8 +1884,8 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) (*img_handle)->desc_dirty = true; sctx->bindless_descriptors_dirty = true; - radeon_add_to_gfx_buffer_list_check_mem( - sctx, si_resource(buffer), RADEON_USAGE_READWRITE | RADEON_PRIO_SAMPLER_BUFFER, true); + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), + RADEON_USAGE_READWRITE | RADEON_PRIO_SAMPLER_BUFFER); } } } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 8bbe4ca5aa7..a25f64b40b2 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -550,10 +550,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) ctx->num_buffered_gfx_sh_regs = 0; ctx->num_buffered_compute_sh_regs = 0; - if (ctx->scratch_buffer) { - si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b); + if (ctx->scratch_buffer) si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state); - } if (ctx->streamout.suspended) { ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 740a2e40b05..0402c92d3d7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1311,8 +1311,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); - sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3; - ac_get_hs_info(&sscreen->info, &sscreen->hs); sscreen->has_draw_indirect_multi = diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 27865dd6ec9..992a1cef7f3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -311,8 +311,6 @@ struct si_resource { /* Winsys objects. */ struct pb_buffer *buf; uint64_t gpu_address; - /* Memory usage if the buffer placement is optimal. */ - uint32_t memory_usage_kb; /* Resource properties. */ uint64_t bo_size; @@ -547,7 +545,6 @@ struct si_screen { unsigned width, unsigned height, unsigned depth, bool get_bo_metadata, uint32_t *state, uint32_t *fmask_state); - unsigned max_memory_usage_kb; unsigned pa_sc_raster_config; unsigned pa_sc_raster_config_1; unsigned se_tile_repeat; @@ -1030,8 +1027,6 @@ struct si_context { unsigned last_compressed_colortex_counter; unsigned last_num_draw_calls; unsigned flags; /* flush flags */ - /* Current unaccounted memory usage. */ - uint32_t memory_usage_kb; /* Atoms (direct states). */ union si_state_atoms atoms; @@ -1760,14 +1755,6 @@ static inline unsigned si_get_minimum_num_gfx_cs_dwords(struct si_context *sctx, return 2048 + sctx->num_cs_dw_queries_suspend + num_draws * 10; } -static inline void si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r) -{ - if (r) { - /* Add memory usage for need_gfx_cs_space */ - sctx->memory_usage_kb += si_resource(r)->memory_usage_kb; - } -} - static inline unsigned si_get_atom_bit(struct si_context *sctx, struct si_atom *atom) { return 1 << (atom - sctx->atoms.array); @@ -1982,35 +1969,12 @@ static inline bool util_rast_prim_is_lines_or_triangles(unsigned prim) return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES | UTIL_ALL_PRIM_TRIANGLE_MODES)) != 0; } -/** - * Return true if there is enough memory in VRAM and GTT for the buffers - * added so far. - * - * \param vram VRAM memory size not added to the buffer list yet - * \param gtt GTT memory size not added to the buffer list yet - */ -static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs, - uint32_t kb) -{ - return kb + cs->used_vram_kb + cs->used_gart_kb < screen->max_memory_usage_kb; -} - static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) { struct radeon_cmdbuf *cs = &ctx->gfx_cs; - /* There are two memory usage counters in the winsys for all buffers - * that have been added (cs_add_buffer) and one counter in the pipe - * driver for those that haven't been added yet. - */ - uint32_t kb = ctx->memory_usage_kb; - ctx->memory_usage_kb = 0; - - if (radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, kb) && - ctx->ws->cs_check_space(cs, si_get_minimum_num_gfx_cs_dwords(ctx, num_draws))) - return; - - si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); + if (!ctx->ws->cs_check_space(cs, si_get_minimum_num_gfx_cs_dwords(ctx, num_draws))) + si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } /** @@ -2031,33 +1995,6 @@ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct rad bo->domains); } -/** - * Same as above, but also checks memory usage and flushes the context - * accordingly. - * - * When this SHOULD NOT be used: - * - * - if si_context_add_resource_size has been called for the buffer - * followed by *_need_cs_space for checking the memory usage - * - * - when emitting state packets and draw packets (because preceding packets - * can't be re-emitted at that point) - * - * - if shader resource "enabled_mask" is not up-to-date or there is - * a different constraint disallowing a context flush - */ -static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, - struct si_resource *bo, - unsigned usage, - bool check_mem) -{ - if (check_mem && - !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->memory_usage_kb + bo->memory_usage_kb)) - si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage); -} - static inline void si_select_draw_vbo(struct si_context *sctx) { pipe_draw_vbo_func draw_vbo = sctx->draw_vbo[!!sctx->shader.tes.cso] diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c71e67b8485..89f6c18b7af 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2993,8 +2993,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.has_dcc_msaa = true; } - si_context_add_resource_size(sctx, surf->base.texture); - p_atomic_inc(&tex->framebuffers_bound); /* Update the minimum but don't keep 0. */ @@ -3016,8 +3014,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS)) sctx->framebuffer.DB_has_shader_readable_metadata = true; - si_context_add_resource_size(sctx, surf->base.texture); - /* Update the minimum but don't keep 0. */ if (!sctx->framebuffer.min_bytes_per_pixel || zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 7ab7402413a..4a4e97c6796 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2085,10 +2085,6 @@ static void si_draw(struct pipe_context *ctx, unsigned total_direct_count = 0; if (!IS_DRAW_VERTEX_STATE && indirect) { - /* Add the buffer size for memory checking in need_cs_space. */ - if (indirect->buffer) - si_context_add_resource_size(sctx, indirect->buffer); - /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (GFX_VERSION <= GFX8) { if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) { @@ -2207,8 +2203,7 @@ static void si_draw(struct pipe_context *ctx, } } - /* Since we've called si_context_add_resource_size for vertex buffers, - * this must be called after si_need_cs_space, because we must let + /* This must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. * * This must be done after si_update_shaders because si_update_shaders can diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index ec866ffca88..cfb1bfde013 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4044,8 +4044,6 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes) sctx->screen->info.pte_fragment_size); if (!sctx->scratch_buffer) return false; - - si_context_add_resource_size(sctx, &sctx->scratch_buffer->b.b); } if (sctx->gfx_level < GFX11 && !si_update_scratch_relocs(sctx)) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index fefabf875d7..b96789cb356 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -135,7 +135,6 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ if (!targets[i]) continue; - si_context_add_resource_size(sctx, targets[i]->buffer); enabled_mask |= 1 << i; if (offsets[i] == ((unsigned)-1)) diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index f3c5c307192..7097594edbd 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -504,7 +504,6 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex tex->buffer.b.b.bind = templ.bind; radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf); tex->buffer.gpu_address = new_tex->buffer.gpu_address; - tex->buffer.memory_usage_kb = new_tex->buffer.memory_usage_kb; tex->buffer.bo_size = new_tex->buffer.bo_size; tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2; tex->buffer.domains = new_tex->buffer.domains; @@ -988,7 +987,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2; resource->flags = plane0->buffer.flags; resource->domains = plane0->buffer.domains; - resource->memory_usage_kb = plane0->buffer.memory_usage_kb; radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf); resource->gpu_address = plane0->buffer.gpu_address; @@ -1009,7 +1007,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_size = imported_buf->size; resource->bo_alignment_log2 = imported_buf->alignment_log2; resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf); - resource->memory_usage_kb = MAX2(1, resource->bo_size / 1024); if (sscreen->ws->buffer_get_flags) resource->flags = sscreen->ws->buffer_get_flags(resource->buf); }