Strange unspecified launch failure error on cudaMemcpy

I’m newbee in CUDA and am trying to convert my ray tracer into CUDA version, but I can’t stop getting error “unspecified launch failure” on

gpuErrchk( cudaMemcpy(buffer, dev_buffer, sizeof(CudaVec)*w*h, cudaMemcpyDeviceToHost));

. This sentence follows the kernel and I googled a bit it is said to be something related to memory operations inside the kernel. Then, I deleted most of my code and what’s left is:

The host part:

void CudaInit(CudaGeometry* geos, int _n, double* h, double* randNum, int _randN)
{
//  cudaOutput = fopen("cudaoutput.txt", "w");
  hits=h;
  printf("cuda part init %d\n", _n);
  fflush(stdout);
  if(!geos)
  {
    return;
  }

  if(!dev_geos)
  {
    gpuErrchk(cudaFree(dev_geos));
    gpuErrchk(cudaFree(dev_hits));
    gpuErrchk(cudaFree(dev_randNum))
  }

  n = _n;
  randN = _randN;

  printf("CUDA: sizeof(CudaGeometry)=%d, sizeof(CudaVec)=%d, n=%d\n", sizeof(CudaGeometry), sizeof(CudaVec), n);

  gpuErrchk( cudaMalloc((void**)&dev_geos, sizeof(CudaGeometry)*n));
  printf("cudaMalloc((void**)&dev_geos, sizeof(CudaGeometry)*n)\n");

  gpuErrchk( cudaMalloc((void**)&dev_hits, sizeof(double)*n));
  printf("cudaMalloc((void**)&dev_hits, sizeof(double)*n)\n");

  gpuErrchk( cudaMalloc((void**)&dev_randNum, sizeof(double)*randN));
  printf("cudaMalloc((void**)&dev_randNum, sizeof(double)*randN)\n");

  gpuErrchk( cudaMemcpy(dev_geos, geos, sizeof(CudaGeometry)*n, cudaMemcpyHostToDevice));
  printf("cudaMemcpy(dev_geos, geos, sizeof(CudaGeometry)*n, cudaMemcpyHostToDevice)\n");

  gpuErrchk( cudaMemcpy(dev_randNum, randNum, sizeof(double)*randN, cudaMemcpyHostToDevice));
  printf("cudaMemcpy(dev_randNum, randNum, sizeof(double)*randN, cudaMemcpyHostToDevice)\n");

  gpuErrchk( cudaPeekAtLastError() );
  fflush(stdout);
}

void CudaRender(int w, int h, CudaVec camera, CudaVec up, CudaVec forward, CudaVec right, CudaVec* buffer)
{
  CudaVec* dev_buffer;
  memset(buffer, 0, sizeof(CudaVec)*w*h);
  gpuErrchk( cudaMalloc((void**)&dev_buffer, sizeof(CudaVec)*w*h));

  printf("start cuda render\n");
  printf("buffer.size=%d %d\n", sizeof(buffer), sizeof(buffer)/sizeof(CudaVec));
  fflush(stdout);

    CudaMonteCarloRender<<<dim3(w, h), 1>>>(dev_geos, n, w, h, camera, up, forward, right, dev_buffer, dev_randNum, randN);

  fflush(stdout);
  printf("end cuda render\n");
  fflush(stdout);

  gpuErrchk( cudaMemcpy(buffer, dev_buffer, sizeof(CudaVec)*w*h, cudaMemcpyDeviceToHost));
  gpuErrchk( cudaFree(dev_buffer));
}

My kernel:

// empty function
__device__ void CudaMonteCarloSample(CudaGeometry* geolist, int n, CudaVertex o, CudaRay i, double* randNum, int randN, CudaVec& result)
{
//  result = (i.n.Vec3()+CudaVec(1.0, 1.0, 1.0))/2;
  return;

}

__global__ void CudaMonteCarloRender(CudaGeometry* geolist, int n, int w, int h, CudaVec camera, CudaVec up, CudaVec forward, CudaVec right, CudaVec* buffer, double* randNum, int randN)
{
  if(n==0) return;

  int xx=blockIdx.x;
  int yy=blockIdx.y;
  if(xx<0 || xx>=w || yy<0 || yy>=h) return;
  int index = xx+yy*gridDim.x;
  CudaRay ray = GetRay(xx, yy, w, h, camera, up, forward, right); // generate a ray from camera
  buffer[index] = CudaVec(0, 0, 0);

  for(int sp=0; sp<SampleNum; sp++)
  {
    double mind=1e20;
    CudaVertex minp(false);

    // this checks the result of GetRay
    buffer[index] = buffer[index]+(ray.n.Vec3()+CudaVec(1.0, 1.0, 1.0))/2;

    for(int gi=0; gi<n; gi++)
    {
      CudaVertex hp(false);
      CudaGeometry geo = geolist[gi];
      ray.IntersectGeo(geo, hp);
      if(hp.valid==true)
      {
        double d=(hp.p.Vec3()-camera).Length();
        if(d>1e-3 && d<mind)
        {
          mind=d;
          minp=hp;
        }
      }
    }

    if(minp.valid)
    {
      CudaVec result = buffer[index];
      CudaMonteCarloSample(geolist, n, minp, ray, randNum, randN, result);
      buffer[index] = CudaVec(0.0, 0.0, 0.0);
    }
  }

  buffer[index] = buffer[index]/SampleNum;

  return;
}

I can’t figure out what’s wrong with this code. And what’s most wired is that if I delete

buffer[index] = CudaVec(0.0, 0.0, 0.0);

on line 48 in the kernel, there’s no error.

Thank you all!

montecarlosample.cu (15.5 KB)

You could use the method described here:

https://stackoverflow.com/questions/27277365/unspecified-launch-failure-on-memcpy/27278218#27278218

to localize the error to a particular line of kernel code.

Thanks! I’ll try it

I’ve tried it and got following output:

========= CUDA-MEMCHECK
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaMemcpy. 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:C:\WINDOWS\system32\nvcuda.dll (cuGraphicsResourceGetMappedPointer + 0x2cc55c) [0x2d9c2b]
=========     Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\cudart64_80.dll (cudaMemcpy + 0x12f) [0x2acef]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x21eb]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x167a2]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x49d9]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x5ab0]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x4598a]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x457a7]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x45dbc9) [0x47b4f4]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x4a5a71) [0x4c339c]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x4a50fd) [0x4c2a28]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x2256f7) [0x24897d]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x22784e) [0x24aad4]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x226907) [0x249b8d]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x225ed1) [0x249157]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0xaf7b7) [0xd2a3d]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x225945) [0x248bcb]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x344708) [0x36798e]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x442b8) [0x6753e]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x3f460) [0x626e6]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x44f3eb) [0x46cd16]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x5beff0) [0x5dc91b]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x469dd) [0x69c63]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0xf9821) [0x11caa7]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0xf8004) [0x11b28a]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x442b8) [0x6753e]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x3eb3d) [0x61dc3]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x44f3eb) [0x46cd16]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x5beff0) [0x5dc91b]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Guid.dll (QOpenGLFunctions_4_2_Core::glProgramUniform1d + 0xa0282) [0xd3f57]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Guid.dll (QOpenGLFunctions_4_2_Core::glProgramUniform1d + 0xa2e1e) [0xd6af3]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Guid.dll (QOpenGLFunctions_4_2_Core::glProgramUniform1d + 0x60900) [0x945d5]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\platforms\qwindowsd.dll (qt_plugin_query_metadata + 0xb35d7) [0xb9cb2]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x508e26) [0x526751]
=========     Host Frame:C:\WINDOWS\System32\USER32.dll (CallWindowProcW + 0x4dd) [0xb85d]
=========     Host Frame:C:\WINDOWS\System32\USER32.dll (DispatchMessageW + 0x1af) [0xb1ef]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x509830) [0x52715b]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\platforms\qwindowsd.dll (qt_plugin_query_metadata + 0xb3589) [0xb9c64]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x44a08d) [0x4679b8]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x44a2d3) [0x467bfe]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Cored.dll (QBitArray::size + 0x44d1a4) [0x46aacf]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Guid.dll (QOpenGLFunctions_4_2_Core::glProgramUniform1d + 0x9d863) [0xd1538]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\Qt5Widgetsd.dll (QGestureEvent::~QGestureEvent + 0x3e644) [0x618ca]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x3268]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x4a7bd]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x4821d]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x48157]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x4801e]
=========     Host Frame:D:\0 Homeworks & Assignments\CPURenderer\build-CPURender-Desktop_Qt_5_8_0_MSVC2015_64bit-Debug\debug\CPURender.exe [0x48239]
=========     Host Frame:C:\WINDOWS\System32\KERNEL32.DLL (BaseThreadInitThunk + 0x14) [0x11fe4]
=========     Host Frame:C:\WINDOWS\SYSTEM32\ntdll.dll (RtlUserThreadStart + 0x21) [0x6efc1]
=========
========= ERROR SUMMARY: 1 error

This doesn’t give specific reason of the error.

you might be hitting a windows WDDM TDR timeout

Oh yes after I turned that in Nsight everything works now. Thank you!!!