Ryan,
The code is below. When running this, it will deadlock after a few frames, or give the following error:
D3D11 CORRUPTION: ID3D11DeviceContext::DecoderExtension: Two threads were found to be executing functions associated with the same Device[Context] at the same time. This will cause corruption of memory. Appropriate thread synchronization needs to occur external to the Direct3D API (or through the ID3D10Multithread interface). 21196 and 27928 are the implicated thread ids. [ MISCELLANEOUS CORRUPTION #28: CORRUPTED_MULTITHREADING]
While the render loop does not access the buffers that are created for decoding, it will use the same context. Some thread synchronization can be used to address this, but at that point isn’t the encoding going to be essentially synchronous at that point?
#pragma once
#include "nvEncodeAPI.h"
namespace RoomAlive
{
typedef NVENCSTATUS(__stdcall *MYPROC)(NV_ENCODE_API_FUNCTION_LIST *);
#define SET_VER(configStruct, type) { configStruct.version = type##_VER; }
struct MyBuffer
{
void* hInputBuffer;
void* registeredResource;
void* hBitstreamBuffer;
HANDLE hOutputEvent;
NV_ENC_BUFFER_FORMAT bufferFormat;
LARGE_INTEGER startingTime;
NV_ENC_INPUT_PTR mappedResource;
};
class NvEncodeAsynchronous
{
public:
NvEncodeAsynchronous(std::shared_ptr<Context> context, CComPtr<ID3D11Texture2D> inputTexture) : context(context), inputTexture(inputTexture)
{
NVENCSTATUS nvStatus;
// get the nvEncode interface
HINSTANCE instance = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
MYPROC nvEnodeAPICreateInstance = (MYPROC)GetProcAddress(instance, "NvEncodeAPICreateInstance");
encodeAPI = new NV_ENCODE_API_FUNCTION_LIST;
memset(encodeAPI, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
encodeAPI->version = NV_ENCODE_API_FUNCTION_LIST_VER;
nvStatus = nvEnodeAPICreateInstance(encodeAPI);
// open an encode Session
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams;
memset(&encodeSessionExParams, 0, sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS));
SET_VER(encodeSessionExParams, NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS);
encodeSessionExParams.device = (void*)(context->d3d11Device.p); // need AddRef?
encodeSessionExParams.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
encodeSessionExParams.apiVersion = NVENCAPI_VERSION;
nvStatus = encodeAPI->nvEncOpenEncodeSessionEx(&encodeSessionExParams, &encoder);
D3D11_TEXTURE2D_DESC desc;
inputTexture->GetDesc(&desc);
width = desc.Width;
height = desc.Height;
GUID encodeGUID = NV_ENC_CODEC_H264_GUID;
GUID presetGUID = NV_ENC_PRESET_DEFAULT_GUID;
// initialize hardware encoder session with reasonable defaults
NV_ENC_INITIALIZE_PARAMS initializeParams;
memset(&initializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
SET_VER(initializeParams, NV_ENC_INITIALIZE_PARAMS);
initializeParams.encodeGUID = encodeGUID;
initializeParams.encodeWidth = width;
initializeParams.encodeHeight = height;
initializeParams.darWidth = width;
initializeParams.darHeight = height;
initializeParams.frameRateNum = 60;
initializeParams.frameRateDen = 1;
initializeParams.presetGUID = presetGUID;
initializeParams.enableEncodeAsync = 1; // asynchronous
initializeParams.enablePTD = 1; // let encoder descide on picture type (I, P, B)
nvStatus = encodeAPI->nvEncInitializeEncoder(encoder, &initializeParams);
// client should allocate at least 1 + no. B-frames buffers
for (int i = 0; i < 16; i++)
{
HRESULT hr;
ID3D11Texture2D* texture;
{
D3D11_TEXTURE2D_DESC d;
ZeroMemory(&d, sizeof(d));
d.Width = width;
d.Height = height;
d.MipLevels = 1;
d.ArraySize = 1;
d.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
d.SampleDesc.Count = 1;
d.SampleDesc.Quality = 0;
d.Usage = D3D11_USAGE_DEFAULT;
hr = context->d3d11Device->CreateTexture2D(&d, NULL, &texture);
}
NV_ENC_REGISTER_RESOURCE registerResource;
memset(®isterResource, 0, sizeof(registerResource));
SET_VER(registerResource, NV_ENC_REGISTER_RESOURCE);
registerResource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
registerResource.width = width;
registerResource.height = height;
registerResource.pitch = 0;
registerResource.subResourceIndex = 0; // directx subresource
registerResource.resourceToRegister = (void*)texture;
registerResource.bufferFormat = NV_ENC_BUFFER_FORMAT_ARGB;
nvStatus = encodeAPI->nvEncRegisterResource(encoder, ®isterResource);
// allocate output buffer
NV_ENC_CREATE_BITSTREAM_BUFFER createBitStreamBuffer;
memset(&createBitStreamBuffer, 0, sizeof(createBitStreamBuffer));
SET_VER(createBitStreamBuffer, NV_ENC_CREATE_BITSTREAM_BUFFER);
nvStatus = encodeAPI->nvEncCreateBitstreamBuffer(encoder, &createBitStreamBuffer);
// create output event
NV_ENC_EVENT_PARAMS nvEventParams = { 0 };
SET_VER(nvEventParams, NV_ENC_EVENT_PARAMS);
HANDLE hOutputEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
nvEventParams.completionEvent = hOutputEvent;
nvStatus = encodeAPI->nvEncRegisterAsyncEvent(encoder, &nvEventParams);
// add to input buffer queue
MyBuffer* buffer = new MyBuffer();
buffer->hInputBuffer = texture;
buffer->registeredResource = registerResource.registeredResource;
buffer->bufferFormat = NV_ENC_BUFFER_FORMAT_ARGB;
buffer->hBitstreamBuffer = createBitStreamBuffer.bitstreamBuffer;
buffer->hOutputEvent = hOutputEvent; // synchronous
inputBuffers.push(buffer);
}
InitializeCriticalSectionAndSpinCount(&inputBuffersCriticalSection, 0);
InitializeCriticalSectionAndSpinCount(&outputBuffersCriticalSection, 0);
outputSemaphore = CreateSemaphore(NULL, 0, 16, NULL);
fopen_s(&outputFile, "output.h264", "wb");
// use ffmpeg to generate a proper mp4 file from the h264 stream, without re-encoding it:
// ffmpeg -i "output.h264" -c:v copy -f mp4 "output.mp4" -y
// create a thread to handle output
CreateThread(NULL, 0, OutputThread, this, 0, NULL);
}
static DWORD WINAPI OutputThread(LPVOID lpParam)
{
((NvEncodeAsynchronous*)lpParam)->Loop();
return 0;
}
// called in render loop
void Next()
{
printf("Next\n");
NVENCSTATUS nvStatus;
if (inputBuffers.empty())
printf("out of buffers----------------------------------------");
EnterCriticalSection(&inputBuffersCriticalSection);
MyBuffer* buffer = inputBuffers.front();
inputBuffers.pop();
LeaveCriticalSection(&inputBuffersCriticalSection);
context->d3d11DeviceContext->CopyResource((ID3D11Texture2D*)buffer->hInputBuffer, inputTexture);
printf("buffer copied\n");
// map resource
NV_ENC_MAP_INPUT_RESOURCE mapInputResource;
memset(&mapInputResource, 0, sizeof(mapInputResource));
SET_VER(mapInputResource, NV_ENC_MAP_INPUT_RESOURCE);
mapInputResource.registeredResource = buffer->registeredResource;
nvStatus = encodeAPI->nvEncMapInputResource(encoder, &mapInputResource);
buffer->bufferFormat = mapInputResource.mappedBufferFmt;
buffer->mappedResource = mapInputResource.mappedResource;
// push output buffer
EnterCriticalSection(&outputBuffersCriticalSection);
outputBuffers.push(buffer);
LeaveCriticalSection(&outputBuffersCriticalSection);
// signal output thread that there is work to do
ReleaseSemaphore(outputSemaphore, 1, NULL);
printf("outputSemaphore released\n");
// start encoding
NV_ENC_PIC_PARAMS picParams;
memset(&picParams, 0, sizeof(picParams));
SET_VER(picParams, NV_ENC_PIC_PARAMS);
picParams.inputWidth = width;
picParams.inputHeight = height;
picParams.inputPitch = width;
picParams.inputBuffer = buffer->mappedResource;
picParams.outputBitstream = buffer->hBitstreamBuffer;
picParams.bufferFmt = buffer->bufferFormat;
picParams.completionEvent = buffer->hOutputEvent;
picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
nvStatus = encodeAPI->nvEncEncodePicture(encoder, &picParams);
printf("encodePicture %d\n", nvStatus);
QueryPerformanceCounter(&buffer->startingTime);
}
void Loop()
{
NVENCSTATUS nvStatus;
while (true)
{
// block until we have work to do
WaitForSingleObject(outputSemaphore, INFINITE);
printf("outputSemaphore\n");
// get output buffer
EnterCriticalSection(&outputBuffersCriticalSection);
MyBuffer* buffer = outputBuffers.front();
outputBuffers.pop();
LeaveCriticalSection(&outputBuffersCriticalSection);
// wait until encoding is done
WaitForSingleObject(buffer->hOutputEvent, INFINITE);
printf("outputEvent\n");
// elapsed time since submitted for encoding
LARGE_INTEGER endingTime, frequency, elapsedMicroseconds;
QueryPerformanceCounter(&endingTime);
QueryPerformanceFrequency(&frequency);
elapsedMicroseconds.QuadPart = endingTime.QuadPart - buffer->startingTime.QuadPart;
elapsedMicroseconds.QuadPart *= 1000000;
elapsedMicroseconds.QuadPart /= frequency.QuadPart;
// copy bitstream data
NV_ENC_LOCK_BITSTREAM lockBitstream;
memset(&lockBitstream, 0, sizeof(lockBitstream));
SET_VER(lockBitstream, NV_ENC_LOCK_BITSTREAM);
lockBitstream.doNotWait = 1;
lockBitstream.outputBitstream = buffer->hBitstreamBuffer;
//context->renderLock.lock();
//printf("before LockBitstream\n");
//nvStatus = encodeAPI->nvEncLockBitstream(encoder, &lockBitstream);
//printf("LockBitStream %d\n", nvStatus);
//printf("output frame %d %d bytes %lld microsec\n", nFrames++, lockBitstream.bitstreamSizeInBytes, elapsedMicroseconds.QuadPart);
////fwrite(lockBitstreamData.bitstreamBufferPtr, 1, lockBitstreamData.bitstreamSizeInBytes, outputFile);
//nvStatus = encodeAPI->nvEncUnlockBitstream(encoder, buffer->hBitstreamBuffer);
//printf("UnlockBitStream %d\n", nvStatus);
//context->renderLock.unlock();
nvStatus = encodeAPI->nvEncUnmapInputResource(encoder, buffer->mappedResource);
printf("UnmapInputResource %d\n", nvStatus);
printf("copied bitstream\n");
// return buffer
EnterCriticalSection(&inputBuffersCriticalSection);
inputBuffers.push(buffer);
LeaveCriticalSection(&inputBuffersCriticalSection);
printf("returned input buffer\n");
}
}
protected:
NV_ENCODE_API_FUNCTION_LIST * encodeAPI;
CRITICAL_SECTION inputBuffersCriticalSection, outputBuffersCriticalSection;
HANDLE outputSemaphore;
void *encoder;
std::queue<MyBuffer*> outputBuffers;
std::queue<MyBuffer*> inputBuffers;
FILE* outputFile;
int nFrames = 0;
std::shared_ptr<Context> context;
CComPtr<ID3D11Texture2D> inputTexture;
int width, height;
};
}