Hello,
i’m trying to get
//create arrays on device:
cudaArray *d_volume = 0;
volumeSize = make_cudaExtent(dimX, dimY, dimZ);
// create 3D array
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
CUDA_SAFE_CALL(cudaMalloc3DArray(&d_volume, &channelDesc, volumeSize));
CUDA_SAFE_CALL(cudaMalloc((void**)&tmpVolume, width*height*depth*sizeof(uchar)));
CUDA_SAFE_CALL(cudaMemset(tmpVolume, 0, width*height*depth*sizeof(uchar)));
//try to copy:
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)tmpVolume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volume;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyDeviceToDevice;
cudaError err = cudaMemcpy3D(©Params);
to work, but whatever I do, I alway get the error message cudaErrorInvalidValue.
Has anybody an Idea whats the problem here???
Thanks!
Raphael
XeneX
September 23, 2008, 6:12pm
2
Hi,
I have exactly the same problem… does anyone know what the problem is here? Using float instead of uchar works but I really need uchar for performance reasons.
Best regards,
Marco
paulius
September 23, 2008, 8:17pm
3
Most likely the problem is caused by the way your data is stored in gmem before copying into a 3D array. Since copy into 3D array requires a pitched pointer, try allocating with cudaMallocPitch() rather than with cudaMalloc. Then use the pitch that cudaMallocPitch returns as an argument when creating pitched pointer.
Here’s the code that worked for me (I too was getting the error when I used cudaMalloc). Let me know if this fixes your problem.
Paulius
#include <stdio.h>
#define TYPE short
void init_data(TYPE *a, const int dimx, const int dimy, const int dimz)
{
for(int iz=0; iz<dimz; iz++)
for(int iy=0; iy<dimy; iy++)
for(int ix=0; ix<dimx; ix++)
{
*a = 1000*(iz+1) + 100*(iy+1) +ix;
++a;
}
}
int main()
{
cudaSetDevice(0);
int dimx, dimy, dimz;
dimx = 16;
dimy = 2;
dimz = 2;
int num_image_bytes = dimx*dimy*dimz*sizeof(float);
printf("image:\t%7.2f MB\n", num_image_bytes/(1024.f*1024.f));
printf("\n");
///////////////////////////////////////////////////
// allocate and initialize memory
TYPE *h_image=0, *d_image=0;
h_image = (TYPE*)malloc(num_image_bytes);
size_t pitch=0;
cudaMallocPitch( (void**)&d_image, &pitch, dimx*sizeof(TYPE), dimy*dimz );
printf("pitch = %u\n", pitch);
if( 0==h_image || 0==d_image )
{
printf("couldn't allocate memory\n");
return 1;
}
cudaMemset(d_image, 0, num_image_bytes);
////////////////////////////////////////////////////
// prepare texture
cudaChannelFormatDesc ca_descriptor;
cudaExtent ca_extent;
cudaArray *ca_image=0;
ca_descriptor = cudaCreateChannelDesc<TYPE>();
ca_extent.width = dimx;
ca_extent.height = dimy;
ca_extent.depth = dimz;
cudaMalloc3DArray( &ca_image, &ca_descriptor, ca_extent );
init_data( h_image, dimx,dimy,dimz );
cudaMemcpy( d_image, h_image, num_image_bytes, cudaMemcpyHostToDevice );
cudaMemcpy3DParms cpy_params = {0};
cpy_params.extent = ca_extent;
cpy_params.kind = cudaMemcpyDeviceToDevice;
cpy_params.dstArray = ca_image;
cpy_params.srcPtr = make_cudaPitchedPtr( (TYPE*)d_image, pitch, dimx, dimy);
cudaMemcpy3D( &cpy_params );
printf("d2d memcopy: %s\n", cudaGetErrorString( cudaGetLastError() ) );
if(d_image)
cudaFree(d_image);
if(h_image)
free(h_image);
if(ca_image)
cudaFreeArray(ca_image);
return 0;
}
XeneX
September 26, 2008, 7:10am
4
Hello Paulius,
thank you very much, that was the problem. In my opinion, this is not entirely clear in the programming guide. Maybe there could be an additional note in future releases. But for now, everything is back on the road, thanks again!
Best regards,
Marco
Thanks a lot!!! It solved my problem, too!
:D :D :D
btw: what is guided mode?
Most likely the problem is caused by the way your data is stored in gmem before copying into a 3D array. Since copy into 3D array requires a pitched pointer, try allocating with cudaMallocPitch() rather than with cudaMalloc. Then use the pitch that cudaMallocPitch returns as an argument when creating pitched pointer.
Here’s the code that worked for me (I too was getting the error when I used cudaMalloc). Let me know if this fixes your problem.
Paulius
#include <stdio.h>
#define TYPE short
void init_data(TYPE *a, const int dimx, const int dimy, const int dimz)
{
  for(int iz=0; iz<dimz; iz++)
    for(int iy=0; iy<dimy; iy++)
      for(int ix=0; ix<dimx; ix++)
      {
        *a = 1000*(iz+1) + 100*(iy+1) +ix;
        ++a;
      }
}
int main()
{
  cudaSetDevice(0);
  int dimx, dimy, dimz;
  dimx = 16;
  dimy = 2;
  dimz = 2;
  int num_image_bytes = dimx*dimy*dimz*sizeof(float);
  printf("image:\t%7.2f MB\n", num_image_bytes/(1024.f*1024.f));
  printf("\n");
  ///////////////////////////////////////////////////
  // allocate and initialize memory
  TYPE *h_image=0, *d_image=0;
  h_image = (TYPE*)malloc(num_image_bytes);
  size_t pitch=0;
  cudaMallocPitch( (void**)&d_image, &pitch, dimx*sizeof(TYPE), dimy*dimz );
  printf("pitch = %u\n", pitch);
  if( 0==h_image || 0==d_image )
  {
    printf("couldn't allocate memory\n");
 return 1;
  }
  cudaMemset(d_image, 0, num_image_bytes);
  ////////////////////////////////////////////////////
  // prepare texture
  cudaChannelFormatDesc ca_descriptor;
  cudaExtent ca_extent;
  cudaArray *ca_image=0;
  ca_descriptor = cudaCreateChannelDesc<TYPE>();
  ca_extent.width  = dimx;
  ca_extent.height = dimy;
  ca_extent.depth  = dimz;
  cudaMalloc3DArray( &ca_image, &ca_descriptor, ca_extent );
Â
  init_data( h_image, dimx,dimy,dimz );
  cudaMemcpy( d_image, h_image, num_image_bytes, cudaMemcpyHostToDevice );
  cudaMemcpy3DParms cpy_params = {0};
  cpy_params.extent  = ca_extent;
  cpy_params.kind   = cudaMemcpyDeviceToDevice;
  cpy_params.dstArray = ca_image;
  cpy_params.srcPtr  = make_cudaPitchedPtr( (TYPE*)d_image, pitch, dimx, dimy);
  cudaMemcpy3D( &cpy_params );
  printf("d2d memcopy: %s\n", cudaGetErrorString( cudaGetLastError() ) );
  if(d_image)
    cudaFree(d_image);
  if(h_image)
    free(h_image);
  if(ca_image)
    cudaFreeArray(ca_image);
  return 0;
}
[snapback]443417[/snapback]