cudaMempcy2d problem

Hi everyone!:)

I have a problem with cudaMemcpy2D, what I mean is, the status (cudaError) of copy is 11 = cudaErrorInvalidValue. And I don’t know where I made a mistake.

I’m asking for a help:)

64bit Win7, cuda 3.2, GF GTS 250

code:

cudaError_t st, st1, st2;

        size_t pitch;

	float **image_cpu, **temp;

	float *image_gpu, *gpu_result;

	int w=230;

	int h=320;

	

        dim3 threadsPerBlock(16, 16); 

	dim3 numBlocks(w / threadsPerBlock.x, h / threadsPerBlock.y); 

	

        //2d array allocated on gpu

	st = cudaMallocPitch((void**)&image_gpu, &pitch, w*sizeof(float), h);

	cout<<st<<endl;

	st = cudaMallocPitch((void**)&gpu_result, &pitch, w*sizeof(float), h);

	cout<<st<<endl;

//2d array allocated on cpu

	try{

		image_cpu = new float *[w];

		temp = new float*[w];}

	catch(bad_alloc){

		cout<<"bad 1\n";}

	try{

	for(int i=0;i<w;i++){

		image_cpu[i] = new float[h];

		temp[i] = new float[h];}

	}

	catch(bad_alloc){cout<<"bad 2\n";}

	// cuPrintf

	cudaPrintfInit();

	kernel<<<numBlocks, threadsPerBlock>>>(image_gpu, w, h);

	cudaThreadSynchronize();

	cudaPrintfDisplay(stdout, true);

	cudaPrintfEnd();

	

	cudaPrintfInit();

 	kernel<<<numBlocks, threadsPerBlock>>>(gpu_result, w, h);

	cudaPrintfDisplay(stdout, true);

	cudaPrintfEnd();

	cudaThreadSynchronize();

	////////////////////////////////////////////////

        // here is a code for fill up the right tables//

        ////////////////////////////////////////////////

	st1 = cudaMemcpy2D(image_gpu,  w*h*sizeof(float), image_cpu,  pitch, w*h*sizeof(float), h, cudaMemcpyHostToDevice);

	cout<<st1<<endl; // there I got cudaError = 11