Hey,
I’m working on a project (videodecoder) where i need to map parts of a program from the cpu to the gpu. I’ve managed to get a working kernel but atm it’s not optimised yet, as it copies the outcome of the last step back to host memory from where it’s send back to the device to show on the screen :D
Anyways, the code available was a cpp project and i tried to include extra parts but when i do CUDA_SAFE_CALL(cudaD3D9Begin(m_device)); I get The program ‘[1056] Application.exe: Native’ has exited with code 1 (0x1).
I can paste some code below . . from main.cpp
HRESULT InitD3D( HWND hWnd )
{
// Create the D3D object, which is needed to create the D3DDevice.
if( NULL == ( g_pD3D = Direct3DCreate9( D3D_SDK_VERSION ) ) ) return E_FAIL;
RECT m_rcWindowBounds;
RECT m_rcWindowClient;
GetWindowRect(hWnd,&m_rcWindowBounds);
GetClientRect(hWnd,&m_rcWindowClient);
int width = m_rcWindowClient.right - m_rcWindowClient.left;
int height = m_rcWindowClient.bottom - m_rcWindowClient.top;
D3DPRESENT_PARAMETERS d3dpp;
ZeroMemory( &d3dpp, sizeof(d3dpp) );
d3dpp.Windowed = TRUE;
d3dpp.BackBufferCount = 1;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.hDeviceWindow = hWnd;
d3dpp.BackBufferWidth = width;
d3dpp.BackBufferHeight = height;
d3dpp.BackBufferFormat = D3DFMT_A8R8G8B8;
d3dpp.FullScreen_RefreshRateInHz= 0;
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
UINT AdapterToUse = D3DADAPTER_DEFAULT;
D3DDEVTYPE DeviceType = D3DDEVTYPE_HAL;
for (UINT Adapter = 0; Adapter < g_pD3D->GetAdapterCount(); Adapter++)
{
D3DADAPTER_IDENTIFIER9 id;
HRESULT res;
res = g_pD3D->GetAdapterIdentifier(Adapter,0,&id);
if (strcmp(id.Description,"NVIDIA NVPerfHUD") == 0)
{
AdapterToUse = Adapter;
DeviceType = D3DDEVTYPE_REF;
break;
}
}
if( FAILED( g_pD3D->CreateDevice( AdapterToUse, DeviceType, hWnd,
D3DCREATE_HARDWARE_VERTEXPROCESSING,
&d3dpp, &g_pd3dDevice ) ) )
{
if( FAILED( g_pD3D->CreateDevice( AdapterToUse, DeviceType, hWnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp, &g_pd3dDevice ) ) )
{
return E_FAIL;
}
}
D3DCAPS9 caps;
int dynamicTexture = 0;
g_pD3D->GetDeviceCaps(AdapterToUse,DeviceType,&caps);
dynamicTexture = (caps.Caps2 | D3DCAPS2_DYNAMICTEXTURES);
g_viewImage = NULL;
g_videoDecoder->profile = g_profile;
g_displayController = new CDisplayController(width,height,g_pd3dDevice,hWnd,g_videoDecoder->getVideoMemory(),g_state);
Now i enter CDisplayController class…
{
m_GPUstate = GPUstate;
m_width = width;
m_height = height;
m_widthUV = width / 2;
m_heightUV = height / 2;
HRESULT hr;
m_device = device;
m_device->CreateTexture(width,height,1,D3DUSAGE_RENDERTARGET,D3DFMT_A8R8G8B8,D3DPOOL_DEFAULT,&m_renderTexture,NULL);
m_renderTexture->GetSurfaceLevel(0,&m_renderSurface);
m_viewport.X = 0;
m_viewport.Y = 0;
m_viewport.Width = width;
m_viewport.Height= height;
m_viewport.MinZ = 0.0f;
m_viewport.MaxZ = 10.0f;
m_hwnd = hwnd;
VOID* pData;
float m_w2 = (float)width / 2;
float m_h2 = (float)height / 2;
CUDAD3DVERTEXTEXTURE aQuad[] = { { -m_w2, m_h2, 0.0f, 0.0f, 0.0f},
{ -m_w2,-m_h2, 0.0f, 0.0f, 1.0f},
{ m_w2, m_h2, 0.0f, 1.0f, 0.0f},
{ m_w2,-m_h2, 0.0f, 1.0f, 1.0f}
};
m_device->CreateVertexBuffer(sizeof(aQuad),D3DUSAGE_WRITEONLY,VIEWIMAGEFVF,D3DPOOL_MANAGED,&m_imageVB,NULL);
m_imageVB->Lock(0,sizeof(pData),(void**)&pData,0);
memcpy(pData,aQuad,sizeof(aQuad));
m_imageVB->Unlock();
D3DXMatrixOrthoRH(&m_projection,(float)m_width,(float)m_height,0,10);
D3DXMatrixIdentity(&m_modelview);
device->GetRenderTarget(0,&m_backBuffer);
m_renderTexture = NULL;
m_imageVB = NULL;
m_videoMemory = videoMemory;
m_videoMemoryGPU= (CVideoMemoryGPU*)videoMemory;
initCUDA(m_device);
initCUDA is defined in a seperate cu file (so called via extern “C” void…)
and m_device is g_pd3dDevice from the main
extern "C" void initCUDA(LPDIRECT3DDEVICE9 m_device)
{
CUT_DEVICE_INIT();
CUDA_SAFE_CALL(cudaD3D9Begin(m_device));
}
At that last line the application exits with code 1 … (Using breakpoints in visual studio, it doesn’t go further. .
Sorry for pasting a lot of code , any help would be appreciated …
EDIT: styling and typo