@Sarnath,
This time I went by your advice and found that the way you allocate and de-allocate 2D device pointer using hostarray variable works , but i tried to copy and then retrieve data from host to device array but failed. Here is what I did:
float *d_Ptr;
//copy data
float *a = (float*)malloc(15*sizeof(float));
for(int i = 0; i < 15; i++)
a[i] = 0.1f*i;
float ** temp = NULL, **c = NULL;
temp = (float**)malloc(5 * sizeof(float*));
c = (float**)malloc(5 * sizeof(float*));
for(int i = 0; i < 5; i++)
{
temp[i] = (float*)malloc(3 * sizeof(float));
c[i] = (float*)malloc(3 * sizeof(float));
memset(c[i] , 0 , 3 * sizeof(float));
//memcpy(temp[i] , a + i*3 , 3*sizeof(float));
}
float *b = (float*)malloc(15*sizeof(float));
memset(b , 0 , 15*sizeof(float));
cudaError_t err = cudaSuccess;
err = cudaMalloc( (void**)&d_Ptr , sizeof(float*) * 5 );
if(err != cudaSuccess)
{
printf("Failure in allocating Col array\n");
exit(1);
}
else
printf("Successful\n");
for(int i = 0; i < 5; i++)
{
err = cudaSuccess;
err = cudaMalloc( (void**)&temp[i] , 3 * sizeof(float) );
//err = cudaMalloc( (void**)&d_Ptr[i] , 3 * sizeof(float) );
if(err != cudaSuccess)
{
printf("Failure in allocating Row array at %d\n", i);
exit(1);
}
else
printf("Successful allocation at %d\n",i);
}
err = cudaMemcpy(d_Ptr, temp, 15*sizeof(float*), cudaMemcpyHostToDevice);
//printf("copy data to allocated device var up\n\n");
//err = cudaSuccess;
//err = cudaMemcpy2D(d_Ptr , 3 * sizeof(float) , a , 3 * sizeof(float) , 3 * sizeof(float) , 5, cudaMemcpyHostToDevice);
if(err != cudaSuccess)
{
printf("Failure in copying data\n");
exit(1);
}
else
printf("Successful copying at \n");
for(int i = 0; i < 5; i++)
memset(temp[i], 0 , 3*sizeof(float) );
printf("\nretrieve data from device var up\n\n");
err = cudaMemcpy2D(temp, 3 * sizeof(float), d_Ptr , 3 * sizeof(float) , 3 * sizeof(float) , 5, cudaMemcpyDeviceToHost);
err = cudaMemcpy2D(c, 3 * sizeof(float), d_Ptr , 3 * sizeof(float) , 3 * sizeof(float) , 5, cudaMemcpyDeviceToHost);
//err = cudaMemcpy2D(b, 3 * sizeof(float), d_Ptr , 3 * sizeof(float) , 3 * sizeof(float) , 5, cudaMemcpyDeviceToHost);
if(err != cudaSuccess)
{
printf("Failure in getting data\n");
exit(1);
}
else
{
//for(int i = 0; i < 15; i++)
// printf("b[%d]=%f\n",i,b[i]);
for(int i = 0; i < 5; i++){
for(int j = 0; j < 3; j++){
printf("temp[%d][%d]=%f\n",i,j , temp[i][j]);
}
}
printf("\n\n");
for(int i = 0; i < 5; i++){
for(int j = 0; j < 3; j++){
printf("c[%d][%d]=%f\n",i,j , c[i][j]);
}
}
printf("Successful getting at \n");
}
printf("Successful End in allocating & getting data.. now freeing up\n\n");
//Free
err = cudaSuccess;
err = cudaMemcpy(temp, d_Ptr , 5 * sizeof(float*), cudaMemcpyDeviceToHost);
for(int i = 0; i < 5; i++)
{
err = cudaSuccess;
/*
err = cudaFree((void*)&d_Ptr[i]);
*/
err = cudaFree(temp[i]);
if(err != cudaSuccess)
{
printf("Failure in freeing Row at %d\n", i);
exit(1);
}
else
printf("Successful free of row %d\n",i);
}
err = cudaSuccess;
err = cudaFree(d_Ptr);
if(err != cudaSuccess)
{
printf("Failure in freeing\n");
exit(1);
}
else
printf("Successful Freeing\n");
printf("ALL DONE!!\n");
Now if I comment stmt cudaMemcpy2D then code works but result is 0.This is the same zero that I talked in one of my earlier posts.
However, if while allocating 2D deviece array I do not use hostarray that is inside loop if i do
cudaMalloc((void**)&d_Temp , s3*sizeof(floaty) );
and then I copy data from host to device using command cudaMemcpy2D, then on retreival I get same data but then it gives seg fault while de-allocating 2D device pointer.
I have tried a lot but not successful in using your tech of allocation and freeing up plus getting exactly same data as put in. Kindly help.