Problems using/copying 2D arrays cudaMallocPitch, cudaMemcpy2D

I have some problems using 2D arrays in CUDA i’m currently reading some resources from file into a 2D array (dimensions of my array [32][1000] doubles). The problem i have is that i can’t get it to work the results after my kernel execution are just random numbers and not the results as it should be. What am i doing wrong using 2D arrays, hope that someone can help me with this?

My starting code before the kernel execution is as followed:




double *devPtr_Reg_RF_Mean_Array;

ComplexNumber *devPtr_Reg_RF_RE_HILBERT;

size_t pitch_Reg_RF_Mean_Array, pitch_Reg_RF_RE_HILBERT;

cudaMallocPitch((void**)&devPtr_Reg_RF_Mean_Array, &pitch_Reg_RF_Mean_Array,NUMBER_OF_SAMPLES * sizeof(double), NUMBER_OF_LINES);

cudaMemcpy2D(devPtr_Reg_RF_Mean_Array, pitch_Reg_RF_Mean_Array, Reg_RF_Mean_Array,

pitch_Reg_RF_Mean_Array, NUMBER_OF_SAMPLES * sizeof(double), NUMBER_OF_LINES, cudaMemcpyHostToDevice);

cudaMallocPitch((void**)&devPtr_Reg_RF_RE_HILBERT, &pitch_Reg_RF_RE_HILBERT,

NUMBER_OF_SAMPLES * sizeof(ComplexNumber), NUMBER_OF_LINES);

hilbert<<<1, 32>>>(devPtr_Reg_RF_Mean_Array, pitch_Reg_RF_Mean_Array, devPtr_Reg_RF_RE_HILBERT, pitch_Reg_RF_RE_HILBERT);


In this case NUMBER_OF_SAMPLES = 1000 and NUMBER_OF_LINES = 32.

The code of my kernel is the following:


global void hilbert(double *devPtr_Reg_RF_Mean_Array, size_t pitch_Reg_RF_Mean_Array,

ComplexNumber *devPtr_Reg_RF_RE_HILBERT, size_t pitch_Reg_RF_RE_HILBERT) {

double * row_Reg_RF_Mean_Array;

ComplexNumber * row_Reg_RF_RE_HILBERT;

row_Reg_RF_Mean_Array = (double*)((char*)devPtr_Reg_RF_Mean_Array + threadIdx.x * pitch_Reg_RF_Mean_Array);

row_Reg_RF_RE_HILBERT = (ComplexNumber*)((char*)devPtr_Reg_RF_RE_HILBERT + threadIdx.x * pitch_Reg_RF_RE_HILBERT);

double s0;


	s0 = 0.0;

	for(int j = 1; j < HILBERT_WINDOW; j+=2) {

		s0 += row_Reg_RF_Mean_Array[i+j] - row_Reg_RF_Mean_Array[i-j] / j;


	row_Reg_RF_RE_HILBERT[i].real = row_Reg_RF_Mean_Array[i];

	row_Reg_RF_RE_HILBERT[i].imag = 2 * s0 / PI;



After the execution of this code a should recieve a 2D matrix containing my real and imaginary results from the Hilbert transformation. I’m copying it back to the host like this: (ComplexNumber is a struct of two doubles one is real and the other one is imaginary)

[codebox]cudaMemcpy2D(Reg_RF_RE_HILBERT, pitch_Reg_RF_RE_HILBERT, devPtr_Reg_RF_RE_HILBERT, pitch_Reg_RF_RE_HILBERT,

NUMBER_OF_SAMPLES * sizeof(ComplexNumber), NUMBER_OF_LINES, cudaMemcpyDeviceToHost);[/codebox]