The graphic card is GT 9500, although the canMapHostMemory field of deviceProperty is 0,
but the mapped memory could be enabled by invoking cudaSetDeviceFlags with cudaDeviceMapHost.
I have needs to provide pointers which can access host memory in host code, and it could be passed to kernel/device function to access the original device memory.
Is there any way to have such a mapped pointer?
the problem is that we still have to pass the device pointer to device function. not the host pointer.
My opinion is to redirect
the address from device pointer to host memory, so that we can access it as
access the host memory.
I hope I could have such code
void*
myAlloc(int size){
void* pHost;
void* pDevice;
cudaHostAlloc(&pHost,size,cudaHostAllocMapped|cudaHostAllocWriteCombine
d);
getRedirectedDevicePointer( &pDevice,pHost, size ); /* specify the start address and range */
return pDevice;
}
void some_function(){
int* p = (int*)myAlloc(1024);
for(int i=0; i<1024; ++i){
p[ i ] = rand(); /* implicitly access, which could redirect to access the original host memroy /
}
kernel_function<<<1,1,0>>>(p); / pass for device to access its mapped memory */
cudaThreadSynchronize();
for(int i=0; i<1024; ++i){
printf("%d ",p[ i ]);
}
}