Hi,
I’m trying to transfer data in a DeviceToDevice copy from an array allocated using cudaMalloc to a cudaArray using cudaMemcpy3D. However the data transfer is not working.
I need to make this data transfer in order to complete a cycle and make a proces iterative. I dont know why it is not working, when i use cudaGetLastError i get: invalid argument.
i don’t know why, maybe i am using a wrong way to declare the cudaPitchedPointer. I will post my clean code in order for anyone to see it. Can anybody see an error in the way i am using cudaMemcpy3D the seccond time???
best regards and lots of thank yous in advence
C.
// arrays on host:
float *f1,*f2;
// arrays on device:
float *f1_data, *f2_data;
// cudaArrays on device:
cudaArray *f1_array;
//textures:
texture <float, 3, cudaReadModeElementType> f1_tex;
// memory allocation:
f1 = (float )malloc(sizeof(float) * ni * nj * nk);
f2 = (float )malloc(sizeof(float) * ni * nj * nk);
cudaMalloc((void **)&f1_data, ninjnk);
// texture binding:
cudaChannelFormatDesc desc;
cudaExtent extent;
desc = cudaCreateChannelDesc<float>();
extent.width = ni;
extent.height = nj;
extent.depth = nk;
cudaMalloc3DArray( &f1_array, &desc, extent);
cudaBindTextureToArray(f1_tex, f1_array, desc);
f1_tex.filterMode = cudaFilterModePoint;
// array nitiallization:
for (i=0; i<totpoints; i++) {
f1[i] = i;
f2[i] = -1.f;
}
// data transfer: f1 ---> f1_array:
cudaMemcpy3DParms p = { 0 };
p.extent = extent;
p.kind = cudaMemcpyHostToDevice;
p.dstArray = f1_array;
p.srcPtr = make_cudaPitchedPtr( (void*)f1, ni*sizeof(float), ni, nj );
cudaMemcpy3D(&p);
// kernel call, uses texturefetch to modify f1_data by adding 1.f:
operation_kernel<<<grid, block>>>(ni, nj, f1_data);
cudaUnbindTexture(f1_tex);
// data transfer: f1_data —> f1_array: IT DOESNT WORK!!!
cudaMemcpy3DParms p1 = { 0 };
p1.extent = extent;
p1.kind = cudaMemcpyDeviceToDevice;
p1.srcPtr = make_cudaPitchedPtr( (void*)f1_data, ni*sizeof(float), ni, nj );
p1.dstArray = f1_array;
cudaMemcpy3D(&p1);
cudaBindTextureToArray(f1_tex, f1_array, desc);
f1_tex.filterMode = cudaFilterModePoint;
// Kernel call to add 1.f again:
operacion_kernel<<<grid, block>>>(ni, nj, f1_data);
// return data to CPU:
cudaMemcpy(f2, f1_data, ninjnk*sizeof(float),cudaMemcpyDeviceToHost);
if(f1_data)
cudaFree(f1_data);
if(f1)
free(f1);
if(f2)
free(f2);
cudaUnbindTexture(f1_tex);
if(f1_array)
cudaFreeArray(f1_array);
return 0;
}
so after the program ends and i want to see the results i should see f1’s value being increased by 2 however thisis not the case it only does it once.
i’m baffled…