Program crash when copying data onto device

Hey there,

I’m completely new to OpenCL and I can’t find a thread to my issue.

My problem is that I have a single method in which I create a kernel, run and delete it. (maybe not the smartest way, but that’s only for the first tests) The method itself is called several times (currently approx. 30), but so far I don’t access any data after I free the memory.

My program crashes appear to be at random. Sometimes it crashes on the first execution of move, sometimes it runs through nicely and sometimes it crashes in between. I don’t get any error code (except code 0) or message, the program window just closes and that’s it. I only can tell it’s always when copying data onto my graphics card.

I really tried everything I could imagine! :(

Hopefully that’s just a beginners issue.

Here is the host method:

void move(

         const float *srcPos, float *destPos,

         size_t numVertices)

      {         

         cl_context context = 0;

         cl_command_queue commandQueue = 0;

         cl_program program = 0;

         cl_device_id device = 0;

         cl_kernel kernel = 0;

         cl_int errNum;

// Create an OpenCL context on first available platform

         context = CreateContext();

// Create a command-queue on the first device available

         // on the created context

         commandQueue = CreateCommandQueue(context, &device);

char* kernelPath = "move.cl";         

program = CreateProgram(context, device, kernelPath);

// Create OpenCL kernel

         kernel = clCreateKernel(program, "move_kernel", NULL);         

// Create memory objects that will be used as arguments to kernel.

         float* result = (float*)malloc(numVertices * sizeof(float));

cl_mem d_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY,

                                 sizeof(float) * numVertices, NULL, &errNum);   

cl_mem d_srcPos = clCreateBuffer( context, CL_MEM_READ_ONLY,

                                 sizeof(float) * numVertices * 3, (void*)srcPos, &errNum);      

//#########################

         // This is the call that causes the crash

         //#########################

         errNum = clEnqueueWriteBuffer(commandQueue, d_srcPos, CL_TRUE, 0, sizeof(float) * numVertices * 3,

                           (void*)srcPos, 0, NULL, NULL);

         //#########################

         if (errNum != CL_SUCCESS)

         {

            std::cerr << "Error setting kernel argument." << std::endl;

            std::cout << "Error code: " << errNum << std::endl;   

            std::getchar();

            return;

         }

cl_mem d_destPos = clCreateBuffer( context, CL_MEM_WRITE_ONLY,

                                 sizeof(float) * numVertices * 3, NULL, &errNum);

errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_srcPos);

         errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_destPos);

         errNum |= clSetKernelArg(kernel, 2, sizeof(int), &numVertices);

         errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_result);

if (errNum != CL_SUCCESS)

         {

            std::cerr << "Error setting kernel arguments." << std::endl;

            std::cout << "Error code: " << errNum << std::endl;

            Cleanup(context, commandQueue, program, kernel, memObjects);

            std::getchar();

            return;

         }

size_t localWorkSize[1] = { 1 };

         size_t globalWorkSize[1] = { numVertices };

// Queue the kernel up for execution across the array

         errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,

                                 globalWorkSize, localWorkSize,

                                 0, NULL, NULL);

// Read the output buffer back to the Host      

         errNum = clEnqueueReadBuffer(commandQueue, d_result, CL_TRUE,

                               0, numVertices * sizeof(float), result,

                               0, NULL, NULL);

std::cout << "Executed program succesfully." << std::endl;

free(result);

clReleaseMemObject(d_result);

         clReleaseMemObject(d_srcPos);

         clReleaseMemObject(d_destPos);

clReleaseCommandQueue(commandQueue);

clReleaseKernel(kernel);

clReleaseProgram(program);

clReleaseContext(context);

}

The kernel so far looks like this:

__kernel void move_kernel(__global const float *d_srcPos,

                                 __global float *d_destPos,

                                 int numVertices,

                                 __global float *d_result

                                 )

{   

        int gid = get_global_id(0);   

d_result[gid] = gid;

}

I also tried to use

cl_mem d_srcPos = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * numVertices * 3, (void*)srcPos, &errNum);

instead, but that causes the same behavior.

A lot of the code is still from some example file, which worked perfectly, but since I added this mem-copy function I get the described crashes.

Since I’m still working on it, the variable result is just a test variable, that I sometimes use to output values (e.g. get_global_id(), or such).

For the sake of readability I also deleted all the error handling like

if(kernel == NULL)

because I really tested everything and the only thing that causes the crash is the marked function call.

Does anyone have an Idea what could cause my Problem?

Thanks in advance!

Cheers,

–Markus

My Device:

Quadro 5000

Driver 285.58

PS: I also posted in the khronos forum, which gave me the idea it could also be some driver related issue.

Here is what works for me.

float *d_srcPos;

	

	cl_mem Pinned_d_srcPos = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * numVertices * 3, NULL, NULL);

	cl_mem Dev_d_srcPos = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * numVertices * 3, NULL,NULL);

	d_srcPos = (float*) clEnqueueMapBuffer(commandQueue, Pinned_d_srcPos, CL_TRUE, CL_MAP_WRITE, 0, sizeof(float) * numVertices * 3, 0, NULL, NULL, NULL);

	

        clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &Devd_srcPos);

	//whenever d_srcPos has fresh data 

	

	clEnqueueWriteBuffer(commandQueue, Dev_d_srcPos, CL_TRUE, 0, sizeof(float) * numVertices * 3, d_srcPos, 0, NULL, NULL);

Here is what works for me.

float *d_srcPos;

	

	cl_mem Pinned_d_srcPos = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * numVertices * 3, NULL, NULL);

	cl_mem Dev_d_srcPos = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * numVertices * 3, NULL,NULL);

	d_srcPos = (float*) clEnqueueMapBuffer(commandQueue, Pinned_d_srcPos, CL_TRUE, CL_MAP_WRITE, 0, sizeof(float) * numVertices * 3, 0, NULL, NULL, NULL);

	

        clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &Devd_srcPos);

	//whenever d_srcPos has fresh data 

	

	clEnqueueWriteBuffer(commandQueue, Dev_d_srcPos, CL_TRUE, 0, sizeof(float) * numVertices * 3, d_srcPos, 0, NULL, NULL);