unspecified launch failure on memory write

I’m getting unspecified launch failure when executing this code:

#include <assert.h>

#include <stdint.h>

#include <stdio.h>

__device__ uint64_t (*playground)[1<<21];

__global__ void

crash()

{

  const unsigned index = (blockIdx.x<<9)|threadIdx.x;

  (*playground)[index] = 0x8000010000040000ULL;

}

int

main()

{

  cudaError_t err = cudaMalloc(&playground, 65 * sizeof(uint64_t) * 1<<21);

  assert(err == cudaSuccess);

  crash<<<4096,512>>>();

  cudaThreadSynchronize();

  err = cudaGetLastError();

  puts(cudaGetErrorString(err));

  return 0;

}

any idea why?

I’m on fedora core 12 64 bit, with 2Gb memory on the graphics card, and 4Gb on the host.

I run this test on both runlevel 3 & 5, and got the same result.

Thx.

I.

&playground points to a variable on the device, but you use it on the host.

You need to copy it explicitly:

int

main()

{

  void *temp;

cudaError_t err = cudaMalloc(&temp, sizeof(*playground));

  assert(err == cudaSuccess);

  err = cudaMemcpyToSymbol(playground, &temp, sizeof(playground), 0, cudaMemcpyHostToDevice);

  assert(err == cudaSuccess);

crash<<<4096,512>>>();

  cudaThreadSynchronize();

  err = cudaGetLastError();

  puts(cudaGetErrorString(err));

  return 0;

}