Hi all,
here I encounter a problem of segmentation fault when calling the statement of cudaMemcpy in the following:
[codebox]
int* d_startArray;
CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startArray), sizeof(int)*rLen) );
//…
int lastValue;
printf(“debug: d_startArray=%p rlen=%d”, d_startArray, rLen);
CUDA_SAFE_CALL( cudaMemcpy( &lastValue, d_startArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
printf(“reach here? no!”);
[/codebox]
The printf before it shows that the d_startArray and rLen have some proper values. But the process cannot run to the printf after, which means it is useless to check the return value of cudaMemcpy.
I though it may be the problem of (d_startArrary+(rLen-1)), since this is a device pointer calculation. So i change the program to:
[codebox]
//…
int *h_startArray;
printf("debug: d_startArray=%p rlen=%d", d_startArray, rLen);
cudaMallocHost((void **)&startArray, sizeof(int)*rLen);
printf("reach here? yes.");
cudaError_t ret=cudaMemcpy( startArray, d_startArray, sizeof(int)*rLen, cudaMemcpyDeviceToHost);
printf("reach here? No.");
lastValue=startArray[rLen-1];
[/codebox]
But the segmentation fault is still generated in the cudaMemcpy() statement. I have no idea why this can happen. I read from the cuda’s manual that dereferencing a device pointer in the host side may lead to a segmentation fault. But here I have tried to avoid that.
Does anybody have any idea about this? Thanks very much!
Feng