Hi all,
I have a strange problem when accessing a device memory:
rhis is the code source:
unsigned int* g_image = NULL;
short* g_buffer = NULL;
extern “C”
void copy_slice(short* _host_memory)
{
cudaMemcpy(g_buffer, _host_memory, 512512sizeof(short), cudaMemcpyHostToDevice);
}
extern “C”
bool fn_InitObject(int _rows, int _colomns)
{
if(cudaMalloc((void**)&g_image, 512512sizeof(unsigned int)) != cudaSuccess)
return false;
if(cudaMalloc((void**)&g_buffer, 512*512*sizeof(short)) != cudaSuccess)
return false;
return true;
}
extern “C”
void call_kernels(UINT* _host_result)
{
dim3 dimBlock(8, 64);
dim3 dimGrid(64, 8);
//synchronisation
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
//lunch kernels
render_vr_cuda_kernel<<<dimGrid, dimBlock>>>(g_image,g_buffer);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
//They are destroyed this way:
cudaEventDestroy(start);
cudaEventDestroy(stop);
//copy the result image back to the host buffer
cudaMemcpy(_host_result, g_image, 512*512*4,cudaMemcpyDeviceToHost);
}
the cudaMemcpy function returns a cudaErrorInvalidDevicePointer error ! but when a allocate the device memory (with the cudaMalloc) in the “copy_slice” function the cudaMemcpy returns success.
of course all call fn_InitObject before copy_slice and I have no asynchronous problem.
when i use the cudaMalloc and cudaMemcpy in the same function i get a successful result but the kernel obtains errouneus values from the device memory (g_buffer) when unsing kernals (call_kernels function)
think you for your help