Hi everyone!:)
I have a problem with cudaMemcpy2D, what I mean is, the status (cudaError) of copy is 11 = cudaErrorInvalidValue. And I don’t know where I made a mistake.
I’m asking for a help:)
64bit Win7, cuda 3.2, GF GTS 250
code:
cudaError_t st, st1, st2;
size_t pitch;
float **image_cpu, **temp;
float *image_gpu, *gpu_result;
int w=230;
int h=320;
dim3 threadsPerBlock(16, 16);
dim3 numBlocks(w / threadsPerBlock.x, h / threadsPerBlock.y);
//2d array allocated on gpu
st = cudaMallocPitch((void**)&image_gpu, &pitch, w*sizeof(float), h);
cout<<st<<endl;
st = cudaMallocPitch((void**)&gpu_result, &pitch, w*sizeof(float), h);
cout<<st<<endl;
//2d array allocated on cpu
try{
image_cpu = new float *[w];
temp = new float*[w];}
catch(bad_alloc){
cout<<"bad 1\n";}
try{
for(int i=0;i<w;i++){
image_cpu[i] = new float[h];
temp[i] = new float[h];}
}
catch(bad_alloc){cout<<"bad 2\n";}
// cuPrintf
cudaPrintfInit();
kernel<<<numBlocks, threadsPerBlock>>>(image_gpu, w, h);
cudaThreadSynchronize();
cudaPrintfDisplay(stdout, true);
cudaPrintfEnd();
cudaPrintfInit();
kernel<<<numBlocks, threadsPerBlock>>>(gpu_result, w, h);
cudaPrintfDisplay(stdout, true);
cudaPrintfEnd();
cudaThreadSynchronize();
////////////////////////////////////////////////
// here is a code for fill up the right tables//
////////////////////////////////////////////////
st1 = cudaMemcpy2D(image_gpu, w*h*sizeof(float), image_cpu, pitch, w*h*sizeof(float), h, cudaMemcpyHostToDevice);
cout<<st1<<endl; // there I got cudaError = 11