mediafoundation: Refactor frame, multi slice and combine slice IMFSample emission to make it simpler

Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37982>
This commit is contained in:
Silvio Vilerino
2025-10-16 16:47:50 -04:00
committed by Pohsiang (John) Hsu
parent f4f619e26e
commit fd546c1cde
3 changed files with 227 additions and 220 deletions
@@ -74,6 +74,13 @@ typedef class DX12EncodeContext
struct pipe_h265_enc_picture_desc h265enc;
struct pipe_av1_enc_picture_desc av1enc;
} encoderPicInfo = {};
bool IsSliceAutoModeEnabled()
{
return ((m_Codec == D3D12_VIDEO_ENCODER_CODEC_H264) && (encoderPicInfo.h264enc.slice_mode == PIPE_VIDEO_SLICE_MODE_AUTO)) ||
((m_Codec == D3D12_VIDEO_ENCODER_CODEC_HEVC) && (encoderPicInfo.h265enc.slice_mode == PIPE_VIDEO_SLICE_MODE_AUTO));
}
const D3D12_VIDEO_ENCODER_CODEC m_Codec = D3D12_VIDEO_ENCODER_CODEC_H264;
UINT32 GetPictureType()
{
@@ -620,6 +620,19 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
ComPtr<ID3D12Fence> &pResolveStatsCompletionFence,
UINT64 ResolveStatsCompletionFenceValue,
ID3D12CommandQueue *pSyncObjectQueue );
void ProcessSliceBitstreamData( LPDX12EncodeContext pDX12EncodeContext,
uint32_t slice_idx,
LPBYTE lpBuffer,
std::vector<struct codec_unit_location_t> &mfsample_codec_unit_metadata,
uint64_t &output_buffer_offset );
void FinalizeAndEmitOutputSample( LPDX12EncodeContext pDX12EncodeContext,
ComPtr<IMFMediaBuffer> &spMemoryBuffer,
ComPtr<IMFSample> &spOutputSample,
struct codec_unit_location_t *pCodecUnitMetadata,
unsigned CodecUnitMetadataCount,
DWORD dwReceivedInput,
BOOL bIsLastSlice,
uint64_t ResolveStatsCompletionFenceValue );
HRESULT UpdateAvailableInputType();
HRESULT InternalCheckInputType( IMFMediaType *pType );
HRESULT InternalCheckOutputType( IMFMediaType *pType );
@@ -1259,6 +1259,128 @@ done:
return hr;
}
void
CDX12EncHMFT::ProcessSliceBitstreamData( LPDX12EncodeContext pDX12EncodeContext,
uint32_t slice_idx,
LPBYTE lpBuffer,
std::vector<struct codec_unit_location_t> &mfsample_codec_unit_metadata,
uint64_t &output_buffer_offset )
{
HMFT_ETW_EVENT_START( "GPUIndividualSliceStatsRead", this );
unsigned codec_unit_metadata_count = 0u;
m_pPipeVideoCodec->get_slice_bitstream_data( m_pPipeVideoCodec,
pDX12EncodeContext->pAsyncCookie,
slice_idx,
NULL /*get size*/,
&codec_unit_metadata_count );
assert( codec_unit_metadata_count > 0 );
std::vector<struct codec_unit_location_t> codec_unit_metadata;
codec_unit_metadata.resize( codec_unit_metadata_count, {} );
m_pPipeVideoCodec->get_slice_bitstream_data( m_pPipeVideoCodec,
pDX12EncodeContext->pAsyncCookie,
slice_idx,
codec_unit_metadata.data(),
&codec_unit_metadata_count );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceStatsRead", this );
//
// Copy all the NALs produced in this slice
//
HMFT_ETW_EVENT_START( "GPUIndividualSliceBitstreamRead", this );
struct pipe_box box = { 0 };
box.width = 0;
for( auto &nal : codec_unit_metadata )
box.width += static_cast<int32_t>( nal.size );
box.height = pDX12EncodeContext->pOutputBitRes[slice_idx]->height0;
box.depth = pDX12EncodeContext->pOutputBitRes[slice_idx]->depth0;
struct pipe_transfer *transfer_data = NULL;
HMFT_ETW_EVENT_START( "GPUIndividualSliceBufferMap", this );
uint8_t *pMappedBuffer =
(uint8_t *) m_pPipeContext->buffer_map( m_pPipeContext,
pDX12EncodeContext->pOutputBitRes[slice_idx],
0,
PIPE_MAP_READ,
&box,
&transfer_data );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBufferMap", this );
assert( pMappedBuffer );
if( pMappedBuffer )
{
for( auto &nal : codec_unit_metadata )
{
// Add NAL with adjusted offset for accumulated buffer position
struct codec_unit_location_t accumulated_nal = nal;
accumulated_nal.offset = output_buffer_offset;
mfsample_codec_unit_metadata.push_back( accumulated_nal );
memcpy( lpBuffer + static_cast<size_t>( output_buffer_offset ),
pMappedBuffer + static_cast<size_t>( nal.offset ),
static_cast<size_t>( nal.size ) );
output_buffer_offset += nal.size;
}
}
HMFT_ETW_EVENT_START( "GPUIndividualSliceBufferUnmap", this );
pipe_buffer_unmap( m_pPipeContext, transfer_data );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBufferUnmap", this );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBitstreamRead", this );
}
void
CDX12EncHMFT::FinalizeAndEmitOutputSample( LPDX12EncodeContext pDX12EncodeContext,
ComPtr<IMFMediaBuffer> &spMemoryBuffer,
ComPtr<IMFSample> &spOutputSample,
struct codec_unit_location_t *pCodecUnitMetadata,
unsigned CodecUnitMetadataCount,
DWORD dwReceivedInput,
BOOL bIsLastSlice,
uint64_t ResolveStatsCompletionFenceValue )
{
spOutputSample->AddBuffer( spMemoryBuffer.Get() );
if( FAILED( ConfigureBitstreamOutputSampleAttributes( spOutputSample.Get(),
pDX12EncodeContext,
dwReceivedInput,
bIsLastSlice,
pCodecUnitMetadata,
CodecUnitMetadataCount ) ) )
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureBitstreamOutputSampleAttributes failed", this );
}
// Attach the async stats DXGIBuffers to the MFSample output gated by pAsyncFence completion
{
// Set stats metadata buffers to the sample here. As we are returning the dxgi buffers gated by the completion fence
// for the resolved stats we do not need to wait for the pAsyncFence completion on the CPU.
if( FAILED( ConfigureAsyncStatsMetadataOutputSampleAttributes( spOutputSample.Get(),
pDX12EncodeContext->pPipeResourcePSNRStats,
pDX12EncodeContext->pPipeResourceQPMapStats,
pDX12EncodeContext->pPipeResourceRCBitAllocMapStats,
pDX12EncodeContext->pPipeResourceSATDMapStats,
pDX12EncodeContext->spAsyncFence,
ResolveStatsCompletionFenceValue,
pDX12EncodeContext->pSyncObjectQueue ) ) )
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureAsyncStatsMetadataOutputSampleAttributes failed", this );
}
}
// Issue a new METransformHaveOutput event for the async slices mode
// with the combined MFSample with all slices
// This is done before pAsyncFence is waited on below
// as we already have all the slice info and the async stats
// are attached gated by the pAsyncFence completion
{
std::lock_guard<std::mutex> lock( m_OutputQueueLock );
HMFT_ETW_EVENT_INFO( "METransformHaveOutput", this );
if( SUCCEEDED( QueueEvent( METransformHaveOutput, GUID_NULL, S_OK, nullptr ) ) )
{
m_OutputQueue.push( spOutputSample.Detach() );
m_dwHaveOutputCount++;
}
}
}
// internal thread function to handle encoding and output
void WINAPI
CDX12EncHMFT::xThreadProc( void *pCtx )
@@ -1304,25 +1426,12 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
HMFT_ETW_EVENT_START( "TimeToEmitMFSampleOutput", pThis );
pipe_enc_feedback_metadata metadata = {};
unsigned int encoded_bitstream_bytes = 0u;
std::vector<ComPtr<IMFSample>> spOutputSamples;
std::vector<ComPtr<IMFMediaBuffer>> spMemoryBuffers;
unsigned int num_output_samples_emitted = 1u; // Assume D3D12_VIDEO_ENCODER_COMPRESSED_BITSTREAM_NOTIFICATION_MODE_FULL_FRAME
if (pDX12EncodeContext->sliceNotificationMode == D3D12_VIDEO_ENCODER_COMPRESSED_BITSTREAM_NOTIFICATION_MODE_SUBREGIONS)
{
num_output_samples_emitted = (pThis->m_bSliceGenerationModeSet && (pThis->m_uiSliceGenerationMode == 1)) ?
static_cast<uint32_t>( pDX12EncodeContext->pSliceFences.size() ) :
1u;
}
spOutputSamples.resize( num_output_samples_emitted );
spMemoryBuffers.resize( num_output_samples_emitted );
HMFT_ETW_EVENT_START( "CreateOutputSamples", pThis );
for ( unsigned int sample_idx = 0; sample_idx < num_output_samples_emitted; sample_idx++ )
{
MFCreateSample( &spOutputSamples[sample_idx] );
MFCreateMemoryBuffer( pThis->m_uiMaxOutputBitstreamSize, &spMemoryBuffers[sample_idx] );
}
HMFT_ETW_EVENT_STOP( "CreateOutputSamples", pThis );
uint64_t ResolveStatsCompletionFenceValue = 0;
HANDLE fence_handle = (HANDLE) pThis->m_pPipeContext->screen->fence_get_win32_handle( pThis->m_pPipeContext->screen,
pDX12EncodeContext->pAsyncFence,
&ResolveStatsCompletionFenceValue );
if( fence_handle )
CloseHandle( fence_handle );
{
std::lock_guard<std::mutex> lock( pThis->m_encoderLock );
@@ -1334,178 +1443,93 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
// Otherwise, let's copy all the sliced together here after full frame completion (see below)
if ( !pThis->m_bFlushing && ( pDX12EncodeContext->sliceNotificationMode == D3D12_VIDEO_ENCODER_COMPRESSED_BITSTREAM_NOTIFICATION_MODE_SUBREGIONS ))
{
// Obtain fence value from pipe_fence_handle
uint64_t ResolveStatsCompletionFenceValue = 0;
HANDLE fence_handle = (HANDLE) pThis->m_pPipeContext->screen->fence_get_win32_handle( pThis->m_pPipeContext->screen,
pDX12EncodeContext->pAsyncFence,
&ResolveStatsCompletionFenceValue );
if( fence_handle )
CloseHandle( fence_handle );
//
// Wait for each slice fence and resolve offset/size as each slice is ready
//
uint64_t output_buffer_offset = 0u;
uint32_t num_slice_buffers = static_cast<uint32_t>( pDX12EncodeContext->pSliceFences.size() );
std::vector<struct codec_unit_location_t> codec_unit_metadata;
uint64_t output_buffer_offset = 0u;
std::vector<struct codec_unit_location_t> mfsample_codec_unit_metadata;
const size_t max_default_init_alloc_count_nals = 64u;
codec_unit_metadata.reserve( max_default_init_alloc_count_nals );
mfsample_codec_unit_metadata.reserve( MAX_NALU_LENGTH_INFO_ENTRIES );
auto WaitForFence = [&]( pipe_fence_handle *pFence, uint64_t timeout ) -> bool {
assert( pFence );
HMFT_ETW_EVENT_START( "GPUIndividualSliceCompletionWait", pThis );
bool result = pThis->m_pPipeVideoCodec->fence_wait( pThis->m_pPipeVideoCodec, pFence, timeout ) != 0;
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceCompletionWait", pThis );
assert( result );
if( !result )
{
MFE_ERROR( "[dx12 hmft 0x%p] Fence wait failed", pThis );
}
return result;
};
LPBYTE lpBuffer = NULL;
for( uint32_t slice_idx = 0; slice_idx < num_slice_buffers; slice_idx++ )
// If slice generation mode is explicitly set to 1 (1 slice per output sample) and auto mode is off
// emit multiple output samples, one per slice
if (pThis->m_bSliceGenerationModeSet &&
(pThis->m_uiSliceGenerationMode == 1) &&
(!pDX12EncodeContext->IsSliceAutoModeEnabled())) // We cannot know if the last slice is actually the last one on time to set the last MFSample properties
{
auto cur_output_sample_emitted_idx = ( num_output_samples_emitted == 1 ) ? 0 : slice_idx;
// Reset offset and clear accumulated NALs for per-slice mode (each slice goes to separate buffer)
if( num_output_samples_emitted > 1 )
for( uint32_t slice_idx = 0; slice_idx < num_slice_buffers; slice_idx++ )
{
output_buffer_offset = 0u;
mfsample_codec_unit_metadata.clear();
}
if ((num_output_samples_emitted > 1) || // If multiple output samples, we do this for every slice
// Or if single output sample, we do this only for the first slice
(num_output_samples_emitted == 1) && (slice_idx == 0))
{
spMemoryBuffers[cur_output_sample_emitted_idx]->Lock( &lpBuffer, NULL, NULL );
}
assert( pDX12EncodeContext->pSliceFences[slice_idx] );
HMFT_ETW_EVENT_START( "GPUIndividualSliceCompletionWait", pThis );
bool fenceWaitResult = pThis->m_pPipeVideoCodec->fence_wait( pThis->m_pPipeVideoCodec,
pDX12EncodeContext->pSliceFences[slice_idx],
OS_TIMEOUT_INFINITE ) != 0;
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceCompletionWait", pThis );
assert( fenceWaitResult );
if( fenceWaitResult )
{
HMFT_ETW_EVENT_START( "GPUIndividualSliceStatsRead", pThis );
unsigned codec_unit_metadata_count = 0u;
pThis->m_pPipeVideoCodec->get_slice_bitstream_data( pThis->m_pPipeVideoCodec,
pDX12EncodeContext->pAsyncCookie,
slice_idx,
NULL /*get size*/,
&codec_unit_metadata_count );
assert( codec_unit_metadata_count > 0 );
codec_unit_metadata.clear();
codec_unit_metadata.resize( codec_unit_metadata_count, {} );
pThis->m_pPipeVideoCodec->get_slice_bitstream_data( pThis->m_pPipeVideoCodec,
pDX12EncodeContext->pAsyncCookie,
slice_idx,
codec_unit_metadata.data(),
&codec_unit_metadata_count );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceStatsRead", pThis );
//
// Copy all the NALs produced in this slice and add a new buffer to the MFSample
//
HMFT_ETW_EVENT_START( "GPUIndividualSliceBitstreamRead", pThis );
struct pipe_box box = { 0 };
box.width = 0;
for( auto &nal : codec_unit_metadata )
box.width += static_cast<int32_t>( nal.size );
box.height = pDX12EncodeContext->pOutputBitRes[slice_idx]->height0;
box.depth = pDX12EncodeContext->pOutputBitRes[slice_idx]->depth0;
struct pipe_transfer *transfer_data = NULL;
HMFT_ETW_EVENT_START( "GPUIndividualSliceBufferMap", pThis );
uint8_t *pMappedBuffer =
(uint8_t *) pThis->m_pPipeContext->buffer_map( pThis->m_pPipeContext,
pDX12EncodeContext->pOutputBitRes[slice_idx],
0,
PIPE_MAP_READ,
&box,
&transfer_data );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBufferMap", pThis );
assert( pMappedBuffer );
if( pMappedBuffer )
if( WaitForFence( pDX12EncodeContext->pSliceFences[slice_idx], OS_TIMEOUT_INFINITE ) )
{
for( auto &nal : codec_unit_metadata )
{
// Add NAL with adjusted offset for accumulated buffer position
struct codec_unit_location_t accumulated_nal = nal;
accumulated_nal.offset = output_buffer_offset;
mfsample_codec_unit_metadata.push_back( accumulated_nal );
memcpy( lpBuffer + static_cast<size_t>( output_buffer_offset ),
pMappedBuffer + static_cast<size_t>( nal.offset ),
static_cast<size_t>( nal.size ) );
output_buffer_offset += nal.size;
}
HMFT_ETW_EVENT_START( "GPUIndividualSliceBufferUnmap", pThis );
pipe_buffer_unmap( pThis->m_pPipeContext, transfer_data );
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBufferUnmap", pThis );
}
HMFT_ETW_EVENT_STOP( "GPUIndividualSliceBitstreamRead", pThis );
}
else
{
MFE_ERROR( "[dx12 hmft 0x%p] Slice fence wait failed", pThis );
}
if ((num_output_samples_emitted > 1) || // If multiple output samples, we do this for every slice
// Or if single output sample, we do this only for the last slice
(num_output_samples_emitted == 1) && (slice_idx == (num_slice_buffers - 1)))
{
spMemoryBuffers[cur_output_sample_emitted_idx]->Unlock();
spMemoryBuffers[cur_output_sample_emitted_idx]->SetCurrentLength( static_cast<DWORD>( output_buffer_offset ) );
spOutputSamples[cur_output_sample_emitted_idx]->AddBuffer( spMemoryBuffers[cur_output_sample_emitted_idx].Get() );
HRESULT hr = pThis->ConfigureBitstreamOutputSampleAttributes( spOutputSamples[cur_output_sample_emitted_idx].Get(),
pDX12EncodeContext,
dwReceivedInput,
(slice_idx == (num_slice_buffers - 1)) /* bIsLastSlice */,
mfsample_codec_unit_metadata.data(),
static_cast<unsigned>( mfsample_codec_unit_metadata.size() ) );
if( FAILED( hr ) )
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureBitstreamOutputSampleAttributes failed - hr=0x%08x", pThis, hr );
}
// Attach the async stats DXGIBuffers to the MFSample output gated by pAsyncFence completion
{
// Set stats metadata buffers to the sample here. As we are returning the dxgi buffers gated by the completion fence
// for the resolved stats we do not need to wait for the pAsyncFence completion on the CPU.
if( FAILED( pThis->ConfigureAsyncStatsMetadataOutputSampleAttributes(spOutputSamples[cur_output_sample_emitted_idx].Get(),
pDX12EncodeContext->pPipeResourcePSNRStats,
pDX12EncodeContext->pPipeResourceQPMapStats,
pDX12EncodeContext->pPipeResourceRCBitAllocMapStats,
pDX12EncodeContext->pPipeResourceSATDMapStats,
pDX12EncodeContext->spAsyncFence,
ResolveStatsCompletionFenceValue,
pDX12EncodeContext->pSyncObjectQueue )))
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureAsyncStatsMetadataOutputSampleAttributes failed", pThis );
}
}
// Issue a new METransformHaveOutput event for the async slices mode
// with the combined MFSample with all slices
// This is done before pAsyncFence is waited on below
// as we already have all the slice info and the async stats
// are attached gated by the pAsyncFence completion
{
std::lock_guard<std::mutex> lock( pThis->m_OutputQueueLock );
HMFT_ETW_EVENT_INFO( "METransformHaveOutput", pThis );
if( SUCCEEDED( pThis->QueueEvent( METransformHaveOutput, GUID_NULL, S_OK, nullptr ) ) )
{
pThis->m_OutputQueue.push( spOutputSamples[cur_output_sample_emitted_idx].Detach() );
pThis->m_dwHaveOutputCount++;
}
ComPtr<IMFSample> spOutputSample;
ComPtr<IMFMediaBuffer> spMemoryBuffer;
MFCreateSample( &spOutputSample );
MFCreateMemoryBuffer( pThis->m_uiMaxOutputBitstreamSize, &spMemoryBuffer );
spMemoryBuffer->Lock( &lpBuffer, NULL, NULL );
pThis->ProcessSliceBitstreamData( pDX12EncodeContext, slice_idx, lpBuffer,
mfsample_codec_unit_metadata, output_buffer_offset );
spMemoryBuffer->Unlock();
spMemoryBuffer->SetCurrentLength( static_cast<DWORD>( output_buffer_offset ) );
pThis->FinalizeAndEmitOutputSample( pDX12EncodeContext, spMemoryBuffer,
spOutputSample, mfsample_codec_unit_metadata.data(),
static_cast<unsigned>( mfsample_codec_unit_metadata.size() ),
dwReceivedInput, (slice_idx == (num_slice_buffers - 1)), ResolveStatsCompletionFenceValue );
HMFT_ETW_EVENT_STOP( "TimeToEmitMFSampleOutput", pThis );
}
}
}
else
{
ComPtr<IMFSample> spOutputSample;
ComPtr<IMFMediaBuffer> spMemoryBuffer;
MFCreateSample( &spOutputSample );
MFCreateMemoryBuffer( pThis->m_uiMaxOutputBitstreamSize, &spMemoryBuffer );
spMemoryBuffer->Lock( &lpBuffer, NULL, NULL );
for( uint32_t slice_idx = 0; slice_idx < num_slice_buffers; slice_idx++ )
{
if( WaitForFence( pDX12EncodeContext->pSliceFences[slice_idx], OS_TIMEOUT_INFINITE ) )
{
pThis->ProcessSliceBitstreamData( pDX12EncodeContext, slice_idx, lpBuffer,
mfsample_codec_unit_metadata, output_buffer_offset );
}
}
spMemoryBuffer->Unlock();
spMemoryBuffer->SetCurrentLength( static_cast<DWORD>( output_buffer_offset ) );
pThis->FinalizeAndEmitOutputSample( pDX12EncodeContext, spMemoryBuffer, spOutputSample,
mfsample_codec_unit_metadata.data(),
static_cast<unsigned>( mfsample_codec_unit_metadata.size() ),
dwReceivedInput, TRUE,
ResolveStatsCompletionFenceValue );
HMFT_ETW_EVENT_STOP( "TimeToEmitMFSampleOutput", pThis );
}
// Cleanup fences
for (unsigned slice_idx = 0; slice_idx < pDX12EncodeContext->pSliceFences.size(); slice_idx++)
{
if (pDX12EncodeContext->pSliceFences[slice_idx])
{
pThis->m_pPipeVideoCodec->destroy_fence( pThis->m_pPipeVideoCodec, pDX12EncodeContext->pSliceFences[slice_idx] );
}
}
if (pDX12EncodeContext->pLastSliceFence)
{
@@ -1665,46 +1689,17 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
encoded_bitstream_bytes && ( pDX12EncodeContext->sliceNotificationMode ==
D3D12_VIDEO_ENCODER_COMPRESSED_BITSTREAM_NOTIFICATION_MODE_FULL_FRAME ))
{
HRESULT hr = pThis->ConfigureBitstreamOutputSampleAttributes( spOutputSamples[0].Get(),
pDX12EncodeContext,
dwReceivedInput,
TRUE /* bIsLastSlice */,
&metadata.codec_unit_metadata[0],
metadata.codec_unit_metadata_count );
if( FAILED( hr ) )
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureBitstreamOutputSampleAttributes failed - hr=0x%08x", pThis, hr );
}
// Attach the async stats DXGIBuffers to the MFSample output gated by pAsyncFence completion
{
// Obtain fence value from pipe_fence_handle
uint64_t ResolveStatsCompletionFenceValue = 0;
HANDLE fence_handle = (HANDLE) pThis->m_pPipeContext->screen->fence_get_win32_handle( pThis->m_pPipeContext->screen,
pDX12EncodeContext->pAsyncFence,
&ResolveStatsCompletionFenceValue );
if( fence_handle )
CloseHandle( fence_handle );
if( FAILED( pThis->ConfigureAsyncStatsMetadataOutputSampleAttributes(spOutputSamples[0].Get(),
pDX12EncodeContext->pPipeResourcePSNRStats,
pDX12EncodeContext->pPipeResourceQPMapStats,
pDX12EncodeContext->pPipeResourceRCBitAllocMapStats,
pDX12EncodeContext->pPipeResourceSATDMapStats,
pDX12EncodeContext->spAsyncFence,
ResolveStatsCompletionFenceValue,
pDX12EncodeContext->pSyncObjectQueue )))
{
MFE_ERROR( "[dx12 hmft 0x%p] ConfigureAsyncStatsMetadataOutputSampleAttributes failed", pThis );
}
}
ComPtr<IMFSample> spOutputSample;
ComPtr<IMFMediaBuffer> spMemoryBuffer;
MFCreateSample( &spOutputSample );
MFCreateMemoryBuffer( pThis->m_uiMaxOutputBitstreamSize, &spMemoryBuffer );
if( metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_MAX_FRAME_SIZE_OVERFLOW )
debug_printf( "[dx12 hmft 0x%p] PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_MAX_FRAME_SIZE_OVERFLOW set\n", pThis );
// Set encoding quality metrics (only available after get_feedback on full frame encode)
debug_printf( "[dx12 hmft 0x%p] Frame AverageQP: %d\n", pThis, metadata.average_frame_qp );
if( FAILED( spOutputSamples[0]->SetUINT64( MFSampleExtension_VideoEncodeQP, (UINT64) metadata.average_frame_qp ) ) )
if( FAILED( spOutputSample->SetUINT64( MFSampleExtension_VideoEncodeQP, (UINT64) metadata.average_frame_qp ) ) )
{
debug_printf( "[dx12 hmft 0x%p] WARNING: could not set MFSampleExtension_VideoEncodeQP\n", pThis );
}
@@ -1728,7 +1723,7 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
if( pMappedBuffer )
{
LPBYTE lpBuffer;
spMemoryBuffers[0]->Lock( &lpBuffer, NULL, NULL );
spMemoryBuffer->Lock( &lpBuffer, NULL, NULL );
size_t copied_bytes = 0;
for( unsigned i = 0; i < metadata.codec_unit_metadata_count; i++ )
{
@@ -1737,28 +1732,20 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
static_cast<size_t>( metadata.codec_unit_metadata[i].size ) );
copied_bytes += static_cast<size_t>( metadata.codec_unit_metadata[i].size );
}
spMemoryBuffers[0]->Unlock();
spMemoryBuffers[0]->SetCurrentLength( static_cast<DWORD>( copied_bytes ) );
spMemoryBuffer->Unlock();
spMemoryBuffer->SetCurrentLength( static_cast<DWORD>( copied_bytes ) );
HMFT_ETW_EVENT_START( "GPUFrameEncodeGPUBufferUnmap", pThis );
pipe_buffer_unmap( pThis->m_pPipeContext, transfer_data );
HMFT_ETW_EVENT_STOP( "GPUFrameEncodeGPUBufferUnmap", pThis );
spOutputSamples[0]->AddBuffer( spMemoryBuffers[0].Get() );
}
HMFT_ETW_EVENT_STOP( "GPUFrameEncodeBitstreamRead", pThis );
// Issue a new METransformHaveOutput event for the full frame
// as we only output one MFSample per frame
// This is done after pAsyncFence was waited on above
// and get_feedback was called to get the post resolve metadata
{
std::lock_guard<std::mutex> lock( pThis->m_OutputQueueLock );
HMFT_ETW_EVENT_INFO( "METransformHaveOutput", pThis );
if( SUCCEEDED( pThis->QueueEvent( METransformHaveOutput, GUID_NULL, S_OK, nullptr ) ) )
{
pThis->m_OutputQueue.push( spOutputSamples[0].Detach() );
pThis->m_dwHaveOutputCount++;
}
}
// Use FinalizeAndEmitOutputSample to configure attributes and emit output
pThis->FinalizeAndEmitOutputSample( pDX12EncodeContext, spMemoryBuffer, spOutputSample,
&metadata.codec_unit_metadata[0],
metadata.codec_unit_metadata_count,
dwReceivedInput, TRUE,
ResolveStatsCompletionFenceValue );
HMFT_ETW_EVENT_STOP( "TimeToEmitMFSampleOutput", pThis );
}