array from device printing all zeroes

Hello!

I am new to programming on cuda and i’ll deeply appreciated it if someone can help me look at my code and why it is printing all zeroes when I copied it back to host. Below is my code:

#include
#include
#include
#include
#include
using namespace std;

// Device code
global void CopyData(float* d_array,
float* d_destinationArray,
size_t pitch,
int columnCount,
int rowCount)
{
for (int row = 0; row < rowCount; row++)
{
float* rowData = (float*)(((char*)d_array) + (row * pitch));

		for (int column = 0; column < columnCount; column++)
		{
			rowData[column] = 11.0;
			d_destinationArray[(row*columnCount) + column] = rowData[column];
		}
	}

}

int main(int argc, char** argv)
{

int columnCount = 4;
int rowCount = 19998;

// Host code dealing with memory and the device
float *d_array; // the device array which memory will be allocated to
float *d_destinationArray; // the device array

// allocate memory on the host
float* h_array = new float[columnCount*rowCount];

// the pitch value assigned by cudaMallocPitch which ensures correct data structure alignment
size_t pitch;

// allocated the device memory for source array
cudaMallocPitch(&d_array, &pitch, columnCount * sizeof(float), rowCount);

// allocate the device memory for destination array
cudaMalloc(&d_destinationArray, columnCountrowCountsizeof(float));

// call the kernel which copies values from d_array to d_destinationArray
CopyData<<<100,512>>>(d_array, d_destinationArray, pitch, columnCount, rowCount);

// copy the data back to the host memory
cudaMemcpy(h_array,
d_destinationArray,
columnCountrowCountsizeof(float),
cudaMemcpyDeviceToHost);

// print out the test values, all the 111111.0
for ( i = 0 ; i < rowCount ; i++)
{
for ( j = 0 ; j < columnCount ; j++)
{
cout << “h_array[” << (icolumnCount) + j << “]=” << h_array[(icolumnCount) + j] << endl;

}

}

return 0;
}

i am getting this as results:
h_array[79977]=0
h_array[79978]=0
h_array[79979]=0
h_array[79980]=0
h_array[79981]=0
h_array[79982]=0
h_array[79983]=0
h_array[79984]=0
h_array[79985]=0
h_array[79986]=0
h_array[79987]=0
h_array[79988]=0
h_array[79989]=0
h_array[79990]=0
h_array[79991]=0

I am not sure why as I thought I am giving each element in my array the value of 11.0 is there a problem when I copied from device back to host or is my program just not running on the GPU?

Thank you for any suggestion!!

Seems like this was sorted out for you on your cross posting here:

[url]http://stackoverflow.com/questions/36827161/cuda-array-elements-when-copied-from-device-back-to-host-gives-all-zeroes[/url]