CUDA handle leakage from background thread

Hi all,

I find that every time I use a background thread to execute a CUDA function, I leak one Semaphore handle and one Mutant handle (as seen by the Microsoft Sysinternals tool ‘Handle’, http://technet.microsoft.com/en-us/sysinternals/bb896655).

Eventually, I run out of handles and my app dies.

I’m using MS Visual C++ 2008 Pro (on Windows XP, 32-bit) and the CUDA Toolkit 3.2 from January 2011. I had the same problem with toolkit ver. 2.3.

Any ideas why this is happening? I enclose a code snippet below, which is also contained within the attachment.

Thanks
James

//----------cudafuncs.h-----------------
#ifndef CUDAFUNCS_H_INCLUDED
#define CUDAFUNCS_H_INCLUDED

int RunCudaTest(void);
void ShutdownCuda(void);

#endif //CUDAFUNCS_H_INCLUDED

//---------------cudafuncs.cu-------------
#include “cudafuncs.h”

int
RunCudaTest(void)
{
cudaSetDevice(0);

unsigned char* d_pBuf = NULL;
int nBufSize = 1000;
cudaMalloc((void**)&d_pBuf, nBufSize);
cudaFree(d_pBuf);
return 0;

}// RunCudaTest

void
ShutdownCuda(void)
{
cudaThreadExit();
return;
}

//-------------main.cpp-----------------------

#include
#include
#include “cudafuncs.h”
#include <windows.h>

int gDone; ///< Global variable used to wait for thread to finish

/**

  • Function that is executed in the background thread. It calls
  • a couple of CUDA routines.
    */
    DWORD WINAPI WorkingFunction(LPVOID pv)
    {
    RunCudaTest();
    ShutdownCuda();
    gDone = 1;
    return 0;
    }

/**

  • Function that invokes ‘WorkingFunction’ on a background thread
    */
    void RunInBkgThread(HANDLE& hThr)
    {
    DWORD lThreadID;
    hThr = CreateThread(NULL, 0, WorkingFunction, NULL,
    THREAD_PRIORITY_NORMAL, &lThreadID);
    return;
    }// RunInBkgThread

/**

  • Main program
    /
    int main(int argc, char
    argv)
    {
    HANDLE hThr;

    // Go through several iterations and watch the number
    // of Semaphore and Mutant handles grow…
    for (int i = 0; i < 25; ++i)
    {
    gDone = 0;
    // Run, then loop till it finishes
    RunInBkgThread(hThr);
    while (0 == gDone)
    {
    Sleep(20);
    }

     // Cleanup resources
     CloseHandle(hThr); 
     int p = 4;
    

    }// i loop over trials

    return 0;
    }// main
    cudaHandleLeak.zip (3.33 KB)