invalid device pointer bug? strucr with constructor causes error?

If I have

struct GPUAtomicCounter {

    int *counter;

GPUAtomicCounter( void ) {

        CHECK_CUDA( cudaMalloc( (void**)&counter,

                              sizeof(int) ) );

        CHECK_CUDA( cudaMemset( counter, 0, sizeof(int) ) );

    }

~GPUAtomicCounter( void ) {

        cudaFree( counter );

    }

};

template<typename PREC>

   __global__ void branchTest_kernel(GPUAtomicCounter c){

      int tx = threadIdx.x;

   }

template<typename PREC>

   __host__ void  test_kernelWrap(PREC * a){

      dim3 threads(33);

      dim3 blocks(1);

GPUAtomicCounter c;

       //CHECK_CUDA( cudaMalloc( (void**)&c.counter, sizeof(int) ) );

       //CHECK_CUDA( cudaMemset( c.counter, 0, sizeof(int) ) );

testsKernels::branchTest_kernel<<<blocks,threads>>>(c);

   }

I get a invalide device pointer error after cuda kernel launch, kernel finished correctly, but I dont know whats wrong?

If I comment the destrcutor and constructor out, and initialize the GPUAtomiCounter outside (below definition of c) then the error disappears…?

Does anybody know whats the problem here?

Thanks?

Gabriel!

If I have

struct GPUAtomicCounter {

    int *counter;

GPUAtomicCounter( void ) {

        CHECK_CUDA( cudaMalloc( (void**)&counter,

                              sizeof(int) ) );

        CHECK_CUDA( cudaMemset( counter, 0, sizeof(int) ) );

    }

~GPUAtomicCounter( void ) {

        cudaFree( counter );

    }

};

template<typename PREC>

   __global__ void branchTest_kernel(GPUAtomicCounter c){

      int tx = threadIdx.x;

   }

template<typename PREC>

   __host__ void  test_kernelWrap(PREC * a){

      dim3 threads(33);

      dim3 blocks(1);

GPUAtomicCounter c;

       //CHECK_CUDA( cudaMalloc( (void**)&c.counter, sizeof(int) ) );

       //CHECK_CUDA( cudaMemset( c.counter, 0, sizeof(int) ) );

testsKernels::branchTest_kernel<<<blocks,threads>>>(c);

   }

I get a invalide device pointer error after cuda kernel launch, kernel finished correctly, but I dont know whats wrong?

If I comment the destrcutor and constructor out, and initialize the GPUAtomiCounter outside (below definition of c) then the error disappears…?

Does anybody know whats the problem here?

Thanks?

Gabriel!

I suppose the structure’s destructor is called at the end of the program when the context is already destroyed. I think it’s a bad idea to use destructor to deallocate device memory.

As for the allocation part, what you do appears weird to me. First, I see no need of using a separate structure. But if you just want some typechecking, it is fine because it makes no difference in terms of performance.

Second, normally we do memory allocation in this way:

struct GPUAtomicCounter{ int counter;};

main()

{

...

GPUAtomicCounter *dev_counters;

cudaMalloc((void**)&dev_counter, sizeof(GPUAtomicCounter) * numberOfCounters);

cudaMemset(dev_counter, 0, sizeof(GPUAtomicCounter) * numberOfCounters);

...

cudaFree(dev_counters);

}