Hi i am new to CUDA programming and i am trying to code a simple program on D3D interoperability with CUDA using Driver API.
I am getting an Error CUDA_ERROR_INVALID_VALUE in the function cuD3D9ResourceGetMappedPointer(CUdeviceptr *pDevPtr, IDirect3DResource9 *pResource, U32 face, U32 level).
while i searched for this error in CUDA Reference Manual , they say that this error occurs due to invalid value of face and level arguements. i am passing the values of face= 0 and that of level = 0 as per the Reference manual but then also i am getting error.
I am using a vertex buffer type of object of D3D.My cuD3D9MapResources(1, (IDirect3DResource9**)&positionsVB) function returns CUDA_SUCCESS which =>that their is no error in mapping the resources to a CUDA context but their is an Error in getting the resource mapped pointer.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#ifdef _WIN32
#include<windows.h>
#include <time.h>
#include <sys/time.h>
// includes CUDA
#include<d3d9.h>
#include <cuda.h>
#include<cudaD3D9.h>
// includes project
#include <cutil_inline.h>
#include “matrixMul.h”
#define NULL 0
void Render();
void releaseVB();
//! \ Corresponsds to all the declarations and Definitions
HRESULT result = D3D_OK;
LPDIRECT3D9 D3D = NULL;
IDirect3DDevice9* device;
unsigned int _fractalSize = 512;
unsigned int width = 16;
unsigned int height = 16;
bool g_bWindowed = true;
bool g_bDeviceLost = false;
const unsigned int g_WindowWidth = 512;
const unsigned int g_WindowHeight = 512;
const unsigned int g_MeshWidth = 256;
const unsigned int g_MeshHeight = 256;
const unsigned int g_NumVertices = g_MeshWidth * g_MeshHeight;
bool g_bQAReadback = false;
int g_iFrameToCompare = 10;
D3DPRESENT_PARAMETERS d3dpp;
CUfunction matrixMul_kernel;
LRESULT CALLBACK WindowProc( HWND hWnd,UINT msg,
WPARAM wParam, LPARAM lParam )
{
switch (msg)
{
case WM_PAINT:
{
PAINTSTRUCT ps;
BeginPaint(hWnd, &ps);
EndPaint(hWnd, &ps);
return 0;
}
break;
case WM_KEYDOWN:
switch (wParam)
{
case VK_ESCAPE:
PostQuitMessage(0);
break;
}
break;
case WM_NCDESTROY:
PostQuitMessage(0);
break;
default:
{
return DefWindowProc( hWnd, msg, wParam, lParam );
}
}
return DefWindowProc(hWnd, msg, wParam, lParam);;
}
struct Vertex
{
float x, y, z;
float tu, tv;
};
Vertex g_quadVertices =
{
{-1.0f, -1.0f, 0.5f, 0.0f, 0.0f},
{1.0f, -1.0f, 0.5f, 1.0f, 0.0f},
{-1.0f, 1.0f, 0.5f, 0.0f, 1.0f},
{1.0f, 1.0f, 0.5f, 1.0f, 1.0f},
};
LPDIRECT3DVERTEXBUFFER9 positionsVB;
int main()
{
CUresult resu;
//CUdevice dev;
//////////////////////////////////////////////////////////////////////////
// Create a window
//////////////////////////////////////////////////////////////////////////
WNDCLASSEX winClass = {0};
MSG uMsg;
memset(&uMsg,0,sizeof(uMsg));
winClass.lpszClassName = "MY_WINDOWS_CLASS";
winClass.cbSize = sizeof(WNDCLASSEX);
winClass.style = CS_HREDRAW | CS_VREDRAW;
winClass.lpfnWndProc = WindowProc;
winClass.hIcon = NULL;
winClass.hIconSm = NULL;
winClass.hCursor = LoadCursor(NULL, IDC_ARROW);
winClass.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH);
winClass.lpszMenuName = NULL;
winClass.cbClsExtra = 0;
winClass.cbWndExtra = 0;
if( !RegisterClassEx(&winClass) )
return E_FAIL;
HWND hWnd = CreateWindowEx( NULL, “MY_WINDOWS_CLASS”,
"D3D Interoperability ",
WS_OVERLAPPEDWINDOW | WS_VISIBLE,
0, 0, _fractalSize, _fractalSize, NULL, NULL, NULL, NULL );
if(hWnd == NULL)
{
std::cout<<"Failed to create a window here";
return(1);
}
ShowWindow(hWnd, SW_SHOWNORMAL );
UpdateWindow(hWnd );
resu = cuInit(0);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Error in Driver API Initialization \n";
}
D3D = Direct3DCreate9(D3D_SDK_VERSION);
// Initialize Direct3D
if(D3D == NULL)
std::cout<<"Error: Could not Create D3D device \n";
// Get a CUDA capable adapter
unsigned int adapter = 0;
unsigned int count = D3D->GetAdapterCount();
for (; adapter < D3D->GetAdapterCount(); adapter++)
{
D3DADAPTER_IDENTIFIER9 adapterId;
D3D->GetAdapterIdentifier(adapter, 0, &adapterId);
char *name = adapterId.DeviceName;
int dev;
resu = cuD3D9GetDevice(&dev, adapterId.DeviceName);
if(resu == CUDA_SUCCESS)
break;
}
ZeroMemory(&d3dpp ,sizeof(d3dpp));
// Set up the structure used to create the D3DDevice
d3dpp.Windowed = TRUE;
d3dpp.BackBufferCount = 1;
//d3dpp.hDeviceWindow = hWnd;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.BackBufferFormat = D3DFMT_UNKNOWN;
//d3dpp.FullScreen_RefreshRateInHz = 60;
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
d3dpp.EnableAutoDepthStencil = FALSE;
d3dpp.BackBufferWidth = g_WindowWidth;
d3dpp.BackBufferHeight = g_WindowHeight;
// Create device
result = D3D->CreateDevice(adapter, D3DDEVTYPE_HAL, hWnd, D3DCREATE_HARDWARE_VERTEXPROCESSING, &d3dpp, &device);
if(result != D3D_OK)
{
std::cout<<"Failed to Create Device \n";
return(1);
}
// Initialize driver API
// Create context
CUcontext cuContext;
CUdevice cudevice;
resu = cuD3D9CtxCreate(&cuContext, &cudevice, 1, device);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Context not Created \n";
}
// Create module from binary file
CUmodule cuModule;
const char* mod_name = “data/matrixMul_kernel.cubin”;
resu = cuModuleLoad(&cuModule, mod_name);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Error in Loading the module \n";
}
// Get function handle from module
resu = cuModuleGetFunction(&matrixMul_kernel, cuModule, “matrixMul_kernel”);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Error in Getting the Function from Module\n";
}
///////////////////////////////////////////////////////////////////////////
// Store geometry to be rendered
///////////////////////////////////////////////////////////////////////////
// Create vertex buffer and register it with CUDA
unsigned int size = 4* sizeof(Vertex);
result = device->CreateVertexBuffer(size, 0,D3DFMT_VERTEXDATA, D3DPOOL_DEFAULT, &positionsVB, NULL);
if(result!= D3D_OK)
{
std::cout<<"Failed to create vertex buffer\n";
}
void *pVertices = NULL;
positionsVB->Lock( 0, sizeof(g_quadVertices), (void**)&pVertices, 0 );
memcpy( pVertices, g_quadVertices, sizeof(g_quadVertices) );
positionsVB->Unlock();
resu = cuD3D9RegisterResource(positionsVB, CU_D3D9_REGISTER_FLAGS_NONE);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Error in Registering Resources\n";
}
resu = cuD3D9ResourceSetMapFlags(positionsVB, CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD);
if(resu != CUDA_SUCCESS)
{
std::cout<<"Error in Setting ResourceMAp flags";
}
// Launch rendering loop
unsigned int temp =0;
while (temp<100)
{
Render();
temp++;
}
}
void Render()
{
CUresult res;
// Map vertex buffer for writing from CUDA
CUdeviceptr *positions = NULL;
res = cuD3D9MapResources(1, (IDirect3DResource9**)&positionsVB);
if(res != CUDA_SUCCESS)
{
std::cout<<"Error in Mapping Resources \n";
}
[b]res = cuD3D9ResourceGetMappedPointer(positions,(IDirect3DResource9
*)positionsVB, 0, 0);[/b] //Error at this point of the file res returns CUDA_ERROR_INVALID_VALUE
if(res != CUDA_SUCCESS)
{
std::cout<<"Error in Getting Mapped Pointer :"<<res<<"\n";
}
size_t pitch;
unsigned int pitchslice;
res = cuD3D9ResourceGetMappedPitch(&pitch, &pitchslice, positionsVB, 0, 0);
if(res !=CUDA_SUCCESS)
{
std::cout<<"Error in Getting Mapped Pitch \n";
}
// Execute kernel
int offset = 0;
cuParamSeti(matrixMul_kernel, offset, *positions);
offset += sizeof(positions);
cuParamSeti(matrixMul_kernel, offset, width);
offset += sizeof(width);
cuParamSeti(matrixMul_kernel, offset, height);
offset += sizeof(height);
cuParamSetf(matrixMul_kernel, offset, pitch);
offset += sizeof(pitch);
cuParamSetSize(matrixMul_kernel, offset);
int threadsPerBlock = 16;
cuFuncSetBlockShape(matrixMul_kernel, threadsPerBlock, threadsPerBlock, 1);
cuLaunchGrid(matrixMul_kernel, width / threadsPerBlock, height / threadsPerBlock);
// Unmap vertex buffer
cuD3D9UnmapResources(1, (IDirect3DResource9**)&positionsVB);
releaseVB();
}
void releaseVB()
{
cuD3D9UnregisterResource(positionsVB);
positionsVB->Release();
device->Release();
D3D->Release();
}
please someone point out what is the error in my code.
Thanks in advance :mellow: