Memory is not getting freed up

Hi all,

I have written a kernel that produces correct output on GTX 260 however when I open resource monitor I see physical memory usage shooting up hence eventually program crashes. So I am concerned about freeing up the memory. Does clMemReleaseObject() works correctly or there are some already known problems with it?

I am interfacing this OpenCL code with Matlab. And I have written an interface function (so called mex function) that makes the OpenCL context, initializes device variables and calls the kernel. In the end it frees up the variable using clReleaseMemObject(). Could someone please tell what if I am not freeing up the memory correctly? Also sometime sometime the code does not detect the GPU at all. It returns with the following error code:

“Error: Creating Context. (clCreateContextFromType) status is -2”

-2 is the number for CL_DEVICE_NOT_AVAILABLE

However if I keep on running it again and again it will pick up the device, hence i am kind of lost. So basically I have two problems:

  1. Memory leak when I run the GPU code

  2. clCreateContextFromType() fail intermittently with error code -2 (CL_DEVICE_NOT_AVAILABLE)

Could someone kindly check if my variable initialization is correct? The output of the kernel is a a variable LLR that I am reading back using clEnqueueReadBuffer(), is doing that correct? Is there a better way of initializing variables?? Am I not freeing up the memory correctly?? Any help would be highly appreciated.

System config:

SDK 4.0

Windows 7 64 bit

Geforce GTX 260

Matlab R2010a

Regards,

Richeek

#include <string.h>

#include"cl_resources.h"

#ifdef _CHAR16T

#define CHAR16_T

#endif

#include "mex.h"

#include <string.h>

#include"cl_resources.h"

#ifdef _CHAR16T

#define CHAR16_T

#endif

#include "mex.h"

void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])

{

  cl_int status;  

  /* Initialize OpenCL resoruces */

  status = initializeCL();

  if(status != CL_SUCCESS)

   return;

// input variables

  float *R_re, *R_im, *dist_ZF,*s_re, *s_im ,  *symbol_alphabet_re, *symbol_alphabet_im;

  bool *bittable;

  int kk, nR, nT, nSym, *M, *symbols_ZF_i, total_bits,Sum_M,max_2_M;

size_t size;

  cl_event events[2], ev;

  // output variables

  float *LLR;

if (nrhs!=7)

    mexErrMsgTxt("7 input arguments required \n\n"

             "[LLR] = soft_sd(R,s,dist_ZF,symbols_ZF,symbol_alphabet,bittable) ... soft Sphere Decoder\n\n"

             "  R ... upper triangular matrix obtained from the QR decomposition of the channel H (complex)\n"

             "  s ... received symbol vector, s=Q^H*y (nR x nSym) (complex)\n"

                 "  dist_ZF ... Distance of the zero forcing solution (real)\n"

                 "  symbols_ZF_i ... indices to symbols of the ZF solution (nT x nSym) (real integer)\n"

                 "  M ... number of bits in the corresponding layer (1 x nR) (real)\n"

             "  symbol_alphabet ... for the demapping (2^M_max x nT) (complex)\n"

             "  bittable ... matrix containing the bits according to the symbol_alphabet (M x 2^M) (logical)\n"

             "  LLR  ... max-log-MAP approximation of the LLR values (M*nR) (real)\n\n");

  if (nlhs>1) 

    mexErrMsgTxt("One output lefthand argument required \n");

// check input variables

  if ( ! mxIsComplex(prhs[0]) )

    mexErrMsgTxt("1st argument 'R' must be a complex-valued (nR x nT) matrix");

  if ( ! mxIsComplex(prhs[1]) )

    mexErrMsgTxt("2nd argument 's' must be a complex-valued (nR x nSym) matrix");

  if (   mxIsComplex(prhs[2]) )

    mexErrMsgTxt("3rd argument 'dist_ZF' must be a real-valued (1 x nSym) matrix");

  if (   mxIsComplex(prhs[3]) )

    mexErrMsgTxt("4th argument 'symbols_ZF_i' must be a real-valued (nT x nSym) integer matrix");

  if (   mxIsComplex(prhs[4]) )

    mexErrMsgTxt("5th argument 'M' must be a real-valued (1 x nT) integer matrix");

  if ( ! mxIsComplex(prhs[5]) )

    mexErrMsgTxt("6th argument 'symbol_alphabet' must be a complex-valued (2^M_max x nT) matrix");

  if ( ! mxIsLogical(prhs[6]) )

    mexErrMsgTxt("7th argument 'bittable' must be a logical (M x 2^M) matrix");

nR      = mxGetM(prhs[0]);    //  number of receive antennas

    nT      = mxGetN(prhs[0]);    //  number of transmit antennas

    nSym    = mxGetN(prhs[1]);    //  Block size (number of transmitted symbol vectors)

   Sum_M   = mxGetM(prhs[6]);     //  sum of the number of bits of M   

   max_2_M = mxGetN(prhs[6]);    //  Maximum value of 2^M

// fetch input variables

    R_re = (float *)(mxGetPr(prhs[0]));                    // fetch pointer to real part of R

   R_im = (float *)(mxGetPi(prhs[0]));                    // fetch pointer to imag part of R

    s_re = (float *)(mxGetPr(prhs[1]));                    // fetch pointer to real part of s

   s_im = (float *)(mxGetPi(prhs[1]));                    // fetch pointer to imag part of s

    dist_ZF = (float *)(mxGetPr(prhs[2]));                 // fetch ZF distance

   symbols_ZF_i = (int *)(mxGetPr(prhs[3]));               // fetch pointer to imag part of ZF solution indices

   M = (int *)(mxGetPr(prhs[4]));                          // fetch pointer to number of bits vector

    symbol_alphabet_re = (float *)(mxGetPr(prhs[5]));      // fetch pointer to real part of symbol alphabet

   symbol_alphabet_im = (float *)(mxGetPi(prhs[5]));      // fetch pointer to imag part of symbol alphabet

    bittable = (bool *)(mxGetPr(prhs[6]));                  // fetch pointer to real part of bit mapping table

// allocate memory for output variables

    total_bits = 0;

    for(kk=0; kk<nT; kk++)

        total_bits += M[kk]; 

size_t total_size = 0;

   // Allocate Variables on the Device Global Memory of GPU and Load the input variables to the device

   cl_mem R_re_d;

   size = nR * nT * sizeof(float);

   total_size += size;

   R_re_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return ;

    }

float* R_re_p;

   R_re_p = (float *)clEnqueueMapBuffer(commandQueue,R_re_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(R_re_p, R_re, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,R_re_d,(void *)R_re_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed R_re_d\n");

        return;

   }

   cl_mem s_re_d;

   size = nT * nSym * sizeof(float);

   total_size += size;

s_re_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

float *s_re_p;

   s_re_p = (float *)clEnqueueMapBuffer(commandQueue,s_re_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(s_re_p, s_re, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,s_re_d,(void*)s_re_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed s_re_d\n");

        return;

   }

   cl_mem s_im_d;

   size = nT * nSym * sizeof(float);

   total_size += size;

   s_im_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

float *s_im_p;

   s_im_p = (float *)clEnqueueMapBuffer(commandQueue,s_im_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(s_im_p, s_im, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,s_im_d,(void *)s_im_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() s_im failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed s_im_d\n");

        return;

   }

   cl_mem R_im_d;

   size = nR * nT * sizeof(float);

   total_size += size;

   R_im_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return ;

    }

float *R_im_p;

   R_im_p = (float *)clEnqueueMapBuffer(commandQueue,R_im_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(R_im_p, R_im, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,R_im_d,(void *)R_im_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed R_im with status %d\n", status);

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed R_im_d\n");

        return;

   }

   cl_mem dist_ZF_d;

   size = 1 * nSym * sizeof(float);

   dist_ZF_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   total_size += size;

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

   float *dist_ZF_p;

   dist_ZF_p = (float *)clEnqueueMapBuffer(commandQueue,dist_ZF_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(dist_ZF_p, dist_ZF, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,dist_ZF_d,(void *)dist_ZF_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed dist_ZF_d\n");

        return;

   }

cl_mem symbols_ZF_index_d;

   size = nT * nSym * sizeof(int);

   symbols_ZF_index_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   total_size += size;

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

int *symbols_ZF_index_p;

   symbols_ZF_index_p = (int*)clEnqueueMapBuffer(commandQueue,symbols_ZF_index_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(symbols_ZF_index_p, symbols_ZF_i, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,symbols_ZF_index_d,(void*)symbols_ZF_index_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed symbols_ZF_index_p\n");

        return;

   }

cl_mem M_d;

   size = nT * 1 * sizeof(int);

   M_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   total_size += size;

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

int *M_p;

   M_p = (int*)clEnqueueMapBuffer(commandQueue,M_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(M_p, M, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue, M_d,(void *)M_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed M_d\n");

        return;

   }

cl_mem symbol_alphabet_re_d;

   size = nT * max_2_M * sizeof(float);

   total_size += size;

   symbol_alphabet_re_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

float *symbol_alphabet_re_p;

   symbol_alphabet_re_p = (float *)clEnqueueMapBuffer(commandQueue,symbol_alphabet_re_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(symbol_alphabet_re_p, symbol_alphabet_re, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,symbol_alphabet_re_d,(void *)symbol_alphabet_re_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed symbol_alphabet_re_d\n");

        return;

   }

cl_mem symbol_alphabet_im_d;

   size = nT * max_2_M * sizeof(float);

   symbol_alphabet_im_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   total_size += size;

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

float *symbol_alphabet_im_p;

   symbol_alphabet_im_p = (float *)clEnqueueMapBuffer(commandQueue,symbol_alphabet_im_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(symbol_alphabet_im_p, symbol_alphabet_im, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,symbol_alphabet_im_d,(void *)symbol_alphabet_im_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed symbol_alphabet_im_d\n");

        return;

   }

cl_mem bittable_d;

   size = Sum_M * max_2_M * sizeof(bool);

   bittable_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   total_size += size;

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

       // return;

    }

bool *bittable_p;

   bittable_p = (bool*)clEnqueueMapBuffer(commandQueue,bittable_d,CL_TRUE,CL_MAP_WRITE,0,size,0,NULL,NULL,&status);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: clEnqueueMapBuffer \n");

      return;

   }

   memcpy(bittable_p, bittable, size);

      /* Load the data back on the GPU */

   status = clEnqueueUnmapMemObject(commandQueue,bittable_d,(void*)bittable_p,0,NULL,&ev);

    if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() failed bittable\n");

        return;

   }

   status = clWaitForEvents(1, &ev);

   if(status != CL_SUCCESS)

   {

        mexPrintf("clEnqueueUnmapMemObject() Release failed bittable_d\n");

        return;

   }

/* This is the output */

   cl_mem LLR_d;

   size = Sum_M *nSym * sizeof(float);

   total_size += size;

   LLR_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);

   if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Setting kernel argument. \n");

        return;

    }

   //mexPrintf("Total allocated memory is %d\n", total_size);

   /* Set kernel Arguments */

/*** Set appropriate arguments to the kernel ***/

status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&R_re_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument.\n");

      return;

   }

   status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&R_im_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&s_re_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&s_im_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&dist_ZF_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&symbols_ZF_index_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&symbol_alphabet_re_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *)&symbol_alphabet_im_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *)&bittable_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *)&LLR_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 10, sizeof(int), (void *)&nSym);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *)&M_d);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 12, sizeof(int), (void *)&nT);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 13, sizeof(int), (void *)&nR);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

   status = clSetKernelArg(kernel, 14, sizeof(int), (void *)&total_bits);

   if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Setting kernel argument\n");

      return;

   }

size_t localThreads[2] = {128, 1};

   size_t globalThreads[2] = {nSym, 1};

status = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, &events[0]);

if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)\n");

      return;

   }

/* wait for the kernel call to finish execution */

    status = clWaitForEvents(1, &events[0]);

    if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Waiting for kernel run to finish.(clWaitForEvents)\n");

      return;

   }

// output variable with Sphere Decoder solution of single precision type

    plhs [0] = mxCreateNumericMatrix(total_bits,nSym,mxSINGLE_CLASS,mxREAL);

if(plhs[0] == NULL)

      mexErrMsgTxt("mxCreateNumericMatrix failed(1)\n");

LLR  = (float *) mxGetPr(plhs[0]);                    // fetch pointer for output variable

    if(LLR == NULL ) 

      mexErrMsgTxt("mxCreateNumericMatrix failed(2)\n");

//Copy the data back from the GPU 

   size = Sum_M *nSym * sizeof(float);

   status = clEnqueueReadBuffer(commandQueue,LLR_d,CL_TRUE,0,size,LLR,0,0,&ev);

   if(status != CL_SUCCESS)

   {

      mexPrintf("Error in reading LLR buffer Status is: %d size is(bytes) %d\n", status, size);

   }

   status = clWaitForEvents(1, &ev);

    if(status != CL_SUCCESS) 

   { 

      mexPrintf("Error: Waiting for LLR read to finish.(clWaitForEvents) status is \n", status);

      return;

   }

//free up the host memory

   //free(R_re_p);

   //R_re_p = NULL;

   //free(R_im_p);

   //R_im_p = NULL;

   //free(s_re_p);

   //s_re_p = NULL;

   //free(s_im_p);

   //s_im_p = NULL;

   //free(symbols_ZF_index_p);

   //symbols_ZF_index_p = NULL;

   //free(M_p);

   //M_p = NULL;

   //free(symbol_alphabet_re_p);

   //symbol_alphabet_re_p = NULL;

   //free(symbol_alphabet_im_p);

   //symbol_alphabet_im_p = NULL;

   //free(bittable_p);

   //bittable_p = NULL;

// Free device memory

   status = 0;

   status = clReleaseKernel(kernel);

    status += clReleaseProgram(program);

    status += clReleaseMemObject(R_re_d);

   status += clReleaseMemObject(R_im_d);

   status += clReleaseMemObject(s_re_d);

   status += clReleaseMemObject(s_im_d);

   status += clReleaseMemObject(dist_ZF_d);

   status += clReleaseMemObject(symbols_ZF_index_d);

   status += clReleaseMemObject(M_d);

   status += clReleaseMemObject(symbol_alphabet_re_d);

   status += clReleaseMemObject(symbol_alphabet_im_d);

   status += clReleaseMemObject(bittable_d);

   status += clReleaseMemObject(LLR_d);

status += clReleaseCommandQueue(commandQueue);

    status += clReleaseContext(context);

if(status != 0)

      mexPrintf("Error in freeing up the memory");

}

And this is the OpenCL initialization code:

#include "cl_resources.h"

#ifdef _CHAR16T

#define CHAR16_T

#endif

#include "mex.h"

cl_context          context;

cl_device_id        *devices;

cl_command_queue    commandQueue;

cl_program program;

cl_kernel  kernel;

/*

 * Converts the contents of a file into a string

*/

int convertToString(const char *filename, std::string& s)

{

    size_t size;

    char*  str;

std::fstream f(filename, (std::fstream::in | std::fstream::binary));

if(f.is_open())

    {

        size_t fileSize;

        f.seekg(0, std::fstream::end);

        size = fileSize = (size_t)f.tellg();

        f.seekg(0, std::fstream::beg);

str = new char;

        if(!str)

        {

            f.close();

            return NULL;

        }

f.read(str, fileSize);

        f.close();

        str = '

#include “cl_resources.h”

#ifdef _CHAR16T

#define CHAR16_T

#endif

#include “mex.h”

cl_context context;

cl_device_id *devices;

cl_command_queue commandQueue;

cl_program program;

cl_kernel kernel;

/*

  • Converts the contents of a file into a string

*/

int convertToString(const char *filename, std::string& s)

{

size_t size;

char*  str;

std::fstream f(filename, (std::fstream::in | std::fstream::binary));

if(f.is_open())

{

    size_t fileSize;

    f.seekg(0, std::fstream::end);

    size = fileSize = (size_t)f.tellg();

    f.seekg(0, std::fstream::beg);

str = new char;

    if(!str)

    {

        f.close();

        return NULL;

    }

f.read(str, fileSize);

    f.close();

    str = '\0';

s = str;

    delete[] str;

    return 0;

}

mexPrintf("Error: Failed to open file %s\n", filename);

return 1;

}

/*

  • \brief OpenCL related initialization

  •    Create Context, Device list, Command Queue
    
  •    Create OpenCL memory buffer objects
    
  •    Load CL file, compile, link CL source 
    
  •    Build program and kernel objects
    

*/

int initializeCL(void)

{

cl_int status = 0;

size_t deviceListSize;

cl_uint numPlatforms;

cl_platform_id platform = NULL;

status = clGetPlatformIDs(0, NULL, &numPlatforms);

if(status != CL_SUCCESS)

{

    mexPrintf("Error: Getting Platforms. (clGetPlatformsIDs)\n");

    return 1;

}

if(numPlatforms > 0)

{

    cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));

    status = clGetPlatformIDs(numPlatforms, platforms, NULL);

    if(status != CL_SUCCESS)

    {

        mexPrintf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");

        return 1;

    }

    for(unsigned int i=0; i < numPlatforms; ++i)

    {

        char pbuff[100];

        status = clGetPlatformInfo(

                    platforms[i],

                    CL_PLATFORM_VENDOR,

                    sizeof(pbuff),

                    pbuff,

                    NULL);

        if(status != CL_SUCCESS)

        {

            mexPrintf("Error: Getting Platform Info. (clGetPlatformInfo)\n");

            return 1;

        }

        platform = platforms[i];

        if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))

        {

            break;

        }

    }

    delete platforms;

}

if(NULL == platform)

{

    std::cout << "NULL platform found so Exiting Application." << std::endl;

    return 1;

}

cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

/////////////////////////////////////////////////////////////////

// Create an OpenCL context

/////////////////////////////////////////////////////////////////

context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);

if(status != CL_SUCCESS) 

{  

    mexPrintf("Error: Creating Context. (clCreateContextFromType) status is %d\n", status);

    return 1; 

}

/* First, get the size of device list data */

status = clGetContextInfo(context, 

                          CL_CONTEXT_DEVICES, 

                          0, 

                          NULL, 

                          &deviceListSize);

if(status != CL_SUCCESS) 

{  

    mexPrintf(

        "Error: Getting Context Info \

        (device list size, clGetContextInfo)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Detect OpenCL devices

/////////////////////////////////////////////////////////////////

devices = (cl_device_id *)malloc(deviceListSize);

if(devices == 0)

{

    mexPrintf("Error: No devices found.\n");

    return 1;

}

/* Now, get the device list data */

status = clGetContextInfo(

             context, 

             CL_CONTEXT_DEVICES, 

             deviceListSize, 

             devices, 

             NULL);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Getting Context Info \

        (device list, clGetContextInfo)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Create an OpenCL command queue

/////////////////////////////////////////////////////////////////

commandQueue = clCreateCommandQueue(

                   context, 

                   devices[0], 

                   0, 

                   &status);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Creating Command Queue. (clCreateCommandQueue)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Load CL file, build CL program object, create CL kernel object

/////////////////////////////////////////////////////////////////

const char * filename  = "sts_kernel.cl";

std::string  sourceStr;

status = convertToString(filename, sourceStr);

if(status != CL_SUCCESS)

    return 1;

const char * source = sourceStr.c_str();

size_t sourceSize[]    = { strlen(source) };

program = clCreateProgramWithSource(

              context, 

              1, 

              &source,

              sourceSize,

              &status);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Loading Binary into cl_program \

           (clCreateProgramWithBinary)\n");

  return 1;

}

/* create a cl program executable for all the devices specified */

char* build_log;

size_t log_size;

status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Building Program (clBuildProgram)\n");

	// First call to know the proper size

	clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

	build_log = new char[log_size+1];

	// Second call to get the log

	clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);

	build_log[log_size] = '\0';

	mexPrintf("%s\n", build_log);

	delete[] build_log;

    return 1; 

}

/* get a kernel object handle for a kernel with the given name */

kernel = clCreateKernel(program, "sts_kernel", &status);

if(status != CL_SUCCESS) 

{  

    mexPrintf("Error: Creating Kernel from program. (clCreateKernel)\n");

    return 1;

}

return 0;

}

';

s = str;

        delete[] str;

        return 0;

    }

    mexPrintf("Error: Failed to open file %s\n", filename);

    return 1;

}

/*

 * \brief OpenCL related initialization 

 *        Create Context, Device list, Command Queue

 *        Create OpenCL memory buffer objects

 *        Load CL file, compile, link CL source 

 *		  Build program and kernel objects

 */

int initializeCL(void)

{

    cl_int status = 0;

size_t deviceListSize;

cl_uint numPlatforms;

    cl_platform_id platform = NULL;

    status = clGetPlatformIDs(0, NULL, &numPlatforms);

    if(status != CL_SUCCESS)

    {

        mexPrintf("Error: Getting Platforms. (clGetPlatformsIDs)\n");

        return 1;

    }

if(numPlatforms > 0)

    {

        cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));

        status = clGetPlatformIDs(numPlatforms, platforms, NULL);

        if(status != CL_SUCCESS)

        {

            mexPrintf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");

            return 1;

        }

        for(unsigned int i=0; i < numPlatforms; ++i)

        {

            char pbuff[100];

            status = clGetPlatformInfo(

                        platforms[i],

                        CL_PLATFORM_VENDOR,

                        sizeof(pbuff),

                        pbuff,

                        NULL);

            if(status != CL_SUCCESS)

            {

                mexPrintf("Error: Getting Platform Info. (clGetPlatformInfo)\n");

                return 1;

            }

            platform = platforms[i];

            if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))

            {

                break;

            }

        }

        delete platforms;

    }

if(NULL == platform)

    {

        std::cout << "NULL platform found so Exiting Application." << std::endl;

        return 1;

    }

cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

/////////////////////////////////////////////////////////////////

    // Create an OpenCL context

    /////////////////////////////////////////////////////////////////

    context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);

    if(status != CL_SUCCESS) 

    {  

        mexPrintf("Error: Creating Context. (clCreateContextFromType) status is %d\n", status);

        return 1; 

    }

/* First, get the size of device list data */

    status = clGetContextInfo(context, 

                              CL_CONTEXT_DEVICES, 

                              0, 

                              NULL, 

                              &deviceListSize);

    if(status != CL_SUCCESS) 

    {  

        mexPrintf(

            "Error: Getting Context Info \

            (device list size, clGetContextInfo)\n");

        return 1;

    }

/////////////////////////////////////////////////////////////////

    // Detect OpenCL devices

    /////////////////////////////////////////////////////////////////

    devices = (cl_device_id *)malloc(deviceListSize);

    if(devices == 0)

    {

        mexPrintf("Error: No devices found.\n");

        return 1;

    }

/* Now, get the device list data */

    status = clGetContextInfo(

                 context, 

                 CL_CONTEXT_DEVICES, 

                 deviceListSize, 

                 devices, 

                 NULL);

    if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Getting Context Info \

            (device list, clGetContextInfo)\n");

        return 1;

    }

/////////////////////////////////////////////////////////////////

    // Create an OpenCL command queue

    /////////////////////////////////////////////////////////////////

    commandQueue = clCreateCommandQueue(

                       context, 

                       devices[0], 

                       0, 

                       &status);

    if(status != CL_SUCCESS) 

    { 

        mexPrintf("Creating Command Queue. (clCreateCommandQueue)\n");

        return 1;

    }

/////////////////////////////////////////////////////////////////

    // Load CL file, build CL program object, create CL kernel object

    /////////////////////////////////////////////////////////////////

    const char * filename  = "sts_kernel.cl";

    std::string  sourceStr;

    status = convertToString(filename, sourceStr);

    if(status != CL_SUCCESS)

        return 1;

const char * source    = sourceStr.c_str();

    size_t sourceSize[]    = { strlen(source) };

program = clCreateProgramWithSource(

                  context, 

                  1, 

                  &source,

                  sourceSize,

                  &status);

    if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Loading Binary into cl_program \

               (clCreateProgramWithBinary)\n");

      return 1;

    }

/* create a cl program executable for all the devices specified */

	char* build_log;

	size_t log_size;

    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

    if(status != CL_SUCCESS) 

    { 

        mexPrintf("Error: Building Program (clBuildProgram)\n");

		// First call to know the proper size

		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

		build_log = new char[log_size+1];

		// Second call to get the log

		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);

		build_log[log_size] = '

#include “cl_resources.h”

#ifdef _CHAR16T

#define CHAR16_T

#endif

#include “mex.h”

cl_context context;

cl_device_id *devices;

cl_command_queue commandQueue;

cl_program program;

cl_kernel kernel;

/*

  • Converts the contents of a file into a string

*/

int convertToString(const char *filename, std::string& s)

{

size_t size;

char*  str;

std::fstream f(filename, (std::fstream::in | std::fstream::binary));

if(f.is_open())

{

    size_t fileSize;

    f.seekg(0, std::fstream::end);

    size = fileSize = (size_t)f.tellg();

    f.seekg(0, std::fstream::beg);

str = new char;

    if(!str)

    {

        f.close();

        return NULL;

    }

f.read(str, fileSize);

    f.close();

    str = '\0';

s = str;

    delete[] str;

    return 0;

}

mexPrintf("Error: Failed to open file %s\n", filename);

return 1;

}

/*

  • \brief OpenCL related initialization

  •    Create Context, Device list, Command Queue
    
  •    Create OpenCL memory buffer objects
    
  •    Load CL file, compile, link CL source 
    
  •    Build program and kernel objects
    

*/

int initializeCL(void)

{

cl_int status = 0;

size_t deviceListSize;

cl_uint numPlatforms;

cl_platform_id platform = NULL;

status = clGetPlatformIDs(0, NULL, &numPlatforms);

if(status != CL_SUCCESS)

{

    mexPrintf("Error: Getting Platforms. (clGetPlatformsIDs)\n");

    return 1;

}

if(numPlatforms > 0)

{

    cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));

    status = clGetPlatformIDs(numPlatforms, platforms, NULL);

    if(status != CL_SUCCESS)

    {

        mexPrintf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");

        return 1;

    }

    for(unsigned int i=0; i < numPlatforms; ++i)

    {

        char pbuff[100];

        status = clGetPlatformInfo(

                    platforms[i],

                    CL_PLATFORM_VENDOR,

                    sizeof(pbuff),

                    pbuff,

                    NULL);

        if(status != CL_SUCCESS)

        {

            mexPrintf("Error: Getting Platform Info. (clGetPlatformInfo)\n");

            return 1;

        }

        platform = platforms[i];

        if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))

        {

            break;

        }

    }

    delete platforms;

}

if(NULL == platform)

{

    std::cout << "NULL platform found so Exiting Application." << std::endl;

    return 1;

}

cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

/////////////////////////////////////////////////////////////////

// Create an OpenCL context

/////////////////////////////////////////////////////////////////

context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);

if(status != CL_SUCCESS) 

{  

    mexPrintf("Error: Creating Context. (clCreateContextFromType) status is %d\n", status);

    return 1; 

}

/* First, get the size of device list data */

status = clGetContextInfo(context, 

                          CL_CONTEXT_DEVICES, 

                          0, 

                          NULL, 

                          &deviceListSize);

if(status != CL_SUCCESS) 

{  

    mexPrintf(

        "Error: Getting Context Info \

        (device list size, clGetContextInfo)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Detect OpenCL devices

/////////////////////////////////////////////////////////////////

devices = (cl_device_id *)malloc(deviceListSize);

if(devices == 0)

{

    mexPrintf("Error: No devices found.\n");

    return 1;

}

/* Now, get the device list data */

status = clGetContextInfo(

             context, 

             CL_CONTEXT_DEVICES, 

             deviceListSize, 

             devices, 

             NULL);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Getting Context Info \

        (device list, clGetContextInfo)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Create an OpenCL command queue

/////////////////////////////////////////////////////////////////

commandQueue = clCreateCommandQueue(

                   context, 

                   devices[0], 

                   0, 

                   &status);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Creating Command Queue. (clCreateCommandQueue)\n");

    return 1;

}

/////////////////////////////////////////////////////////////////

// Load CL file, build CL program object, create CL kernel object

/////////////////////////////////////////////////////////////////

const char * filename  = "sts_kernel.cl";

std::string  sourceStr;

status = convertToString(filename, sourceStr);

if(status != CL_SUCCESS)

    return 1;

const char * source = sourceStr.c_str();

size_t sourceSize[]    = { strlen(source) };

program = clCreateProgramWithSource(

              context, 

              1, 

              &source,

              sourceSize,

              &status);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Loading Binary into cl_program \

           (clCreateProgramWithBinary)\n");

  return 1;

}

/* create a cl program executable for all the devices specified */

char* build_log;

size_t log_size;

status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

if(status != CL_SUCCESS) 

{ 

    mexPrintf("Error: Building Program (clBuildProgram)\n");

	// First call to know the proper size

	clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

	build_log = new char[log_size+1];

	// Second call to get the log

	clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);

	build_log[log_size] = '\0';

	mexPrintf("%s\n", build_log);

	delete[] build_log;

    return 1; 

}

/* get a kernel object handle for a kernel with the given name */

kernel = clCreateKernel(program, "sts_kernel", &status);

if(status != CL_SUCCESS) 

{  

    mexPrintf("Error: Creating Kernel from program. (clCreateKernel)\n");

    return 1;

}

return 0;

}

';

		mexPrintf("%s\n", build_log);

		delete[] build_log;

        return 1; 

    }

/* get a kernel object handle for a kernel with the given name */

    kernel = clCreateKernel(program, "sts_kernel", &status);

    if(status != CL_SUCCESS) 

    {  

        mexPrintf("Error: Creating Kernel from program. (clCreateKernel)\n");

        return 1;

    }

return 0;

}

Both these codes are fairly simple all they are doing is setting up the environment to run the kernel. Again thanks for the help.

Ok…
I had an array of booleans in the code(bittable) that I was passing to the kernel. Apparently you can not pass bool to the openCL kernel. Please correct me if I am wrong. I changed this array to char and now the code gives perfect outputs on ATI Radeon 5450 graphics card without any memory leaks.
However, on GTX side it also given correct output. But why do I have memory eaten up by Matlab and intermittent OpenCL call failures?? I really appreciate any help in this regard.

Is Matlab + OpenCL + NVIDIA combination supported? I basically changed the mexopts.bat file and added cl.h and OpenCL.lib files’ location in it. It works on ATI and program compiles and gives correct output on GTX too. But could it be creating a problem wrt memory leak and OpenCL call failures?