#include #include #include #include #include #include #include #include #include namespace { const D3D_FEATURE_LEVEL kRequestedD3DFeatureLevel = D3D_FEATURE_LEVEL_11_1; } void Thread1(std::mutex *textureMutex) { textureMutex->lock(); // Do some work. std::cout << "Thread 1 iteration" << std::endl; std::this_thread::sleep_for(std::chrono::milliseconds(700)); textureMutex->unlock(); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } void Thread2(ID3D11Device *d3dDevice, ID3D11Texture2D *texture, std::mutex *textureMutex) { // Clone D3D device. IDXGIDevice *dxgiDevice = nullptr; IDXGIAdapter *dxgiAdapter = nullptr; d3dDevice->QueryInterface(&dxgiDevice); dxgiDevice->GetAdapter(&dxgiAdapter); ID3D11Device *privateD3dDevice = nullptr; D3D11CreateDevice(dxgiAdapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, #ifdef _DEBUG D3D11_CREATE_DEVICE_BGRA_SUPPORT | D3D11_CREATE_DEVICE_DEBUG, #else D3D11_CREATE_DEVICE_BGRA_SUPPORT, #endif &kRequestedD3DFeatureLevel, 1, D3D11_SDK_VERSION, &privateD3dDevice, nullptr, nullptr); dxgiAdapter->Release(); dxgiAdapter = nullptr; dxgiDevice->Release(); dxgiDevice = nullptr; // Get a private texture alias. IDXGIResource *dxgiResource = nullptr; texture->QueryInterface(&dxgiResource); HANDLE textureSharedHandle = nullptr; dxgiResource->GetSharedHandle(&textureSharedHandle); ID3D11Texture2D *privateTexture = nullptr; privateD3dDevice->OpenSharedResource( textureSharedHandle, __uuidof(ID3D11Texture2D), reinterpret_cast(&privateTexture)); dxgiResource->Release(); dxgiResource = nullptr; // Register the texture for use in CUDA. cudaGraphicsResource_t textureAsCUDAResource = nullptr; cudaGraphicsD3D11RegisterResource( &textureAsCUDAResource, privateTexture, cudaGraphicsRegisterFlagsSurfaceLoadStore); // Create a private CUDA stream. cudaStream_t cudaStream = nullptr; cudaStreamCreateWithFlags(&cudaStream, cudaStreamNonBlocking); while (true) { textureMutex->lock(); cudaGraphicsMapResources(1, &textureAsCUDAResource, cudaStream); // Do some work. std::cout << "Thread 2 iteration" << std::endl; std::this_thread::sleep_for(std::chrono::seconds(1)); cudaStreamSynchronize(cudaStream); cudaGraphicsUnmapResources(1, &textureAsCUDAResource, cudaStream); textureMutex->unlock(); std::this_thread::sleep_for(std::chrono::milliseconds(150)); } cudaStreamDestroy(cudaStream); cudaStream = nullptr; cudaGraphicsUnregisterResource(textureAsCUDAResource); textureAsCUDAResource = nullptr; privateTexture->Release(); privateD3dDevice->Release(); } int main(int const, char const *const[]) { // Create D3D device. ID3D11Device *d3dDevice = nullptr; ID3D11DeviceContext *d3dContext = nullptr; D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, #ifdef _DEBUG D3D11_CREATE_DEVICE_BGRA_SUPPORT | D3D11_CREATE_DEVICE_DEBUG, #else D3D11_CREATE_DEVICE_BGRA_SUPPORT, #endif &kRequestedD3DFeatureLevel, 1, D3D11_SDK_VERSION, &d3dDevice, nullptr, &d3dContext); // Create texture. D3D11_TEXTURE2D_DESC textureDescription = {}; textureDescription.Width = 640; textureDescription.Height = 480; textureDescription.MipLevels = 1; textureDescription.ArraySize = 1; textureDescription.Format = DXGI_FORMAT_B8G8R8A8_UNORM; textureDescription.SampleDesc.Count = 1; textureDescription.SampleDesc.Quality = 0; textureDescription.Usage = D3D11_USAGE_DEFAULT; textureDescription.BindFlags = D3D11_BIND_RENDER_TARGET; textureDescription.CPUAccessFlags = 0; textureDescription.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX; ID3D11Texture2D *texture = nullptr; d3dDevice->CreateTexture2D(&textureDescription, nullptr, &texture); // Create CUDA graph. std::mutex textureMutex; cudaGraph_t graph = nullptr; cudaGraphCreate(&graph, 0); cudaHostNodeParams hostNodeParams = {}; hostNodeParams.fn = reinterpret_cast(Thread1); hostNodeParams.userData = &textureMutex; cudaGraphNode_t graphNode; cudaGraphAddHostNode(&graphNode, graph, nullptr, 0, &hostNodeParams); cudaGraphExec_t execGraph = nullptr; cudaGraphInstantiate(&execGraph, graph, nullptr, nullptr, 0); // Create a private CUDA stream. cudaStream_t cudaStream = nullptr; cudaStreamCreateWithFlags(&cudaStream, cudaStreamNonBlocking); // Launch thread that calls std::mutex::lock() and cudaGraphicsMapResources(). const auto kThread2 = std::thread(std::bind(Thread2, d3dDevice, texture, &textureMutex)); std::this_thread::sleep_for(std::chrono::seconds(2)); // Wait for thread to complete its initialization. while (true) { // Launch function that only calls AcquireSync()/ReleaseSync(). #if 0 // Setting this to 1 will make the example work. Thread1(&textureMutex); #else cudaGraphLaunch(execGraph, cudaStream); cudaStreamSynchronize(cudaStream); #endif } cudaStreamDestroy(cudaStream); cudaStream = nullptr; cudaGraphExecDestroy(execGraph); execGraph = nullptr; cudaGraphDestroy(graph); graph = nullptr; texture->Release(); d3dContext->Release(); d3dDevice->Release(); return 0; }