I want to repeat making and deleting ID3D11Buffer with CUDA External Resource Interoperability every time within a loop, because the number of vertices is different in each loop.
So I modified the NVIDIA’s official simpleD3D11 to the following.
Note that in this case, the number of vertices does not change for explanation.
HRESULT Render()
{
/*static */uint64_t key = 0;
ID3D11Buffer* l_VertexBuffer;
IDXGIKeyedMutex* l_pKeyedMutex11;
Vertex* d_VertexBufPtr = NULL;
cudaExternalMemory_t extMemory;
cudaExternalSemaphore_t extSemaphore;
HRESULT hr = S_OK;
D3D11_BUFFER_DESC bufferDesc;
bufferDesc.Usage = D3D11_USAGE_DEFAULT;
bufferDesc.ByteWidth = sizeof(Vertex) * g_WindowWidth * g_WindowHeight;
bufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufferDesc.CPUAccessFlags = 0;
bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX;
hr = g_pd3dDevice->CreateBuffer(&bufferDesc, NULL, &l_VertexBuffer);
AssertOrQuit(SUCCEEDED(hr));
hr = l_VertexBuffer->QueryInterface(__uuidof(IDXGIKeyedMutex), (void**)&l_pKeyedMutex11);
HANDLE sharedHandle;
IDXGIResource1* pResource;
l_VertexBuffer->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
hr = pResource->GetSharedHandle(&sharedHandle);
if (!SUCCEEDED(hr))
{
std::cout << "Failed GetSharedHandle hr= " << hr << std::endl;
}
// Import the D3D11 Vertex Buffer into CUDA
d_VertexBufPtr = cudaImportVertexBuffer(sharedHandle, extMemory, g_WindowWidth, g_WindowHeight);
pResource->Release();
l_pKeyedMutex11->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
pResource->GetSharedHandle(&sharedHandle);
// Import the D3D11 Keyed Mutex into CUDA
cudaImportKeyedMutex(sharedHandle, extSemaphore);
pResource->Release();
// Launch cuda kernel to generate sinewave in vertex buffer
RunSineWaveKernel(extSemaphore, key, INFINITE, g_WindowWidth, g_WindowWidth, d_VertexBufPtr, cuda_stream);
// Draw the scene using them
DrawScene(key, l_VertexBuffer, l_pKeyedMutex11);
checkCudaErrors(cudaFree(d_VertexBufPtr));
checkCudaErrors(cudaDestroyExternalMemory(extMemory));
checkCudaErrors(cudaDestroyExternalSemaphore(extSemaphore));
l_pKeyedMutex11->Release();
if (l_VertexBuffer)
{
l_VertexBuffer->Release();
}
}
The whole runnable source code is here.
This code causes huge memory leaks: all ID3D11Buffers are not released.
If I comment out all the CUDA External Resource Interoperability-related functions like the following, there is no memory leak.
HRESULT Render()
{
/*static */uint64_t key = 0;
ID3D11Buffer* l_VertexBuffer;
IDXGIKeyedMutex* l_pKeyedMutex11;
Vertex* d_VertexBufPtr = NULL;
cudaExternalMemory_t extMemory;
cudaExternalSemaphore_t extSemaphore;
HRESULT hr = S_OK;
D3D11_BUFFER_DESC bufferDesc;
bufferDesc.Usage = D3D11_USAGE_DEFAULT;
bufferDesc.ByteWidth = sizeof(Vertex) * g_WindowWidth * g_WindowHeight;
bufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufferDesc.CPUAccessFlags = 0;
bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX;
hr = g_pd3dDevice->CreateBuffer(&bufferDesc, NULL, &l_VertexBuffer);
AssertOrQuit(SUCCEEDED(hr));
hr = l_VertexBuffer->QueryInterface(__uuidof(IDXGIKeyedMutex), (void**)&l_pKeyedMutex11);
HANDLE sharedHandle;
IDXGIResource1* pResource;
l_VertexBuffer->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
hr = pResource->GetSharedHandle(&sharedHandle);
if (!SUCCEEDED(hr))
{
std::cout << "Failed GetSharedHandle hr= " << hr << std::endl;
}
// Import the D3D11 Vertex Buffer into CUDA
// d_VertexBufPtr = cudaImportVertexBuffer(sharedHandle, extMemory, g_WindowWidth, g_WindowHeight);
pResource->Release();
l_pKeyedMutex11->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
pResource->GetSharedHandle(&sharedHandle);
// Import the D3D11 Keyed Mutex into CUDA
// cudaImportKeyedMutex(sharedHandle, extSemaphore);
pResource->Release();
// Launch cuda kernel to generate sinewave in vertex buffer
// RunSineWaveKernel(extSemaphore, key, INFINITE, g_WindowWidth, g_WindowWidth, d_VertexBufPtr, cuda_stream);
// Draw the scene using them
DrawScene(key, l_VertexBuffer, l_pKeyedMutex11);
// checkCudaErrors(cudaFree(d_VertexBufPtr));
// checkCudaErrors(cudaDestroyExternalMemory(extMemory));
// checkCudaErrors(cudaDestroyExternalSemaphore(extSemaphore));
l_pKeyedMutex11->Release();
if (l_VertexBuffer)
{
l_VertexBuffer->Release();
}
}
The whole runnable source code is here.
And if I revert only cudaExternalSemaphore_t related functions like the following, there are huge memory leaks again.
HRESULT Render()
{
/*static */uint64_t key = 0;
ID3D11Buffer* l_VertexBuffer;
IDXGIKeyedMutex* l_pKeyedMutex11;
Vertex* d_VertexBufPtr = NULL;
cudaExternalMemory_t extMemory;
cudaExternalSemaphore_t extSemaphore;
HRESULT hr = S_OK;
D3D11_BUFFER_DESC bufferDesc;
bufferDesc.Usage = D3D11_USAGE_DEFAULT;
bufferDesc.ByteWidth = sizeof(Vertex) * g_WindowWidth * g_WindowHeight;
bufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufferDesc.CPUAccessFlags = 0;
bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX;
hr = g_pd3dDevice->CreateBuffer(&bufferDesc, NULL, &l_VertexBuffer);
AssertOrQuit(SUCCEEDED(hr));
hr = l_VertexBuffer->QueryInterface(__uuidof(IDXGIKeyedMutex), (void**)&l_pKeyedMutex11);
HANDLE sharedHandle;
IDXGIResource1* pResource;
l_VertexBuffer->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
hr = pResource->GetSharedHandle(&sharedHandle);
if (!SUCCEEDED(hr))
{
std::cout << "Failed GetSharedHandle hr= " << hr << std::endl;
}
// Import the D3D11 Vertex Buffer into CUDA
// d_VertexBufPtr = cudaImportVertexBuffer(sharedHandle, extMemory, g_WindowWidth, g_WindowHeight);
pResource->Release();
l_pKeyedMutex11->QueryInterface(__uuidof(IDXGIResource1), (void**)&pResource);
pResource->GetSharedHandle(&sharedHandle);
// Import the D3D11 Keyed Mutex into CUDA
cudaImportKeyedMutex(sharedHandle, extSemaphore);
pResource->Release();
// Launch cuda kernel to generate sinewave in vertex buffer
// RunSineWaveKernel(extSemaphore, key, INFINITE, g_WindowWidth, g_WindowWidth, d_VertexBufPtr, cuda_stream);
// Draw the scene using them
DrawScene(key, l_VertexBuffer, l_pKeyedMutex11);
// checkCudaErrors(cudaFree(d_VertexBufPtr));
// checkCudaErrors(cudaDestroyExternalMemory(extMemory));
checkCudaErrors(cudaDestroyExternalSemaphore(extSemaphore));
l_pKeyedMutex11->Release();
if (l_VertexBuffer)
{
l_VertexBuffer->Release();
}
}
The whole runnable source code is here.
So I suspect that cudaImportExternalSemaphore and/or cudaDestroyExternalSemaphore does not handle IDXGIKeyedMutex correctly.
I tried doing l_pKeyedMutex11->Release(); twice, which caused a runtime error.
In my case, there is no memory leak with the old CUDA Graphics Interoperability, but I’ve read that the new CUDA External Resource Interoperability is much faster than the old one in an official slide, I’d like to use the new one.
By the way, I suspect that the NVIDIA’s official simpleD3D11 has forgot to do g_pKeyedMutex11->Release(); at CleanUp(), which also causes another memory leak.
Any comments would be very welcome.
Thank you.