cudaFree(gpuPtrs.p1);
cudaFree(gpuPtrs.p2);
cudaFree(gpuPtrs.p3);
// up to 21 pointers
}
int StartCompute()
{
for( int i=0; i<20; ++i )
{
// for every iteration, I have to call GPUMemAlloc() and GPUFree();
GPUPtrs gpuPtrs;
GPUMemAlloc(gpuPtrs);
ComputeData(gpuPtrs); // Here it calls the 22 kernels
GPUFree(gpuPtrs);
}
return 1;
You are absolutely certain you are freeing all the device memory you are allocating? Generally speaking, having memory allocation and deallocation in a loop like that isn’t a very clever idea. Apart from the performance hit, it is pretty easy to expose a memory leak which will have the exact symptoms you are seeing.