clBuildProgram crash with varying kernel args

I have a kernel which I have defined by


__kernel void ptop(__global int *a, __global int *b, __global int *c)
size_t global_id = get_global_id(0);

    a[global_id] = a[global_id] + b[global_id]


if I try to build this program, I get a crash at clBuildProgram().

If I remove the 3rd argument “__global int *c”, or if I change the statement
a[global_id] = a[global_id] + b[global_id]


    a[global_id] = a[global_id] + b[global_id] + c[global_id]

clBuildProgram() works fine.

My initial guess was the build reporting an usused variable, since I was not using “c”. The trouble is, it shouldn’t crash for something like that, and moreover if the compiler sees that as an issue, I should be getting a build warning in the build logs. The trouble is I am not even able to get a build log, since the crash is fatal at clBuildProgram().

Crash info-

clCreateProgramWithSource created successfully
building program
*** glibc detected *** ./a.out: free(): invalid next size (fast): 0x0805e8a8 ***
======= Backtrace: =========
======= Memory map: ========
08048000-0804a000 r-xp 00000000 03:08 1167003 /root/development/opencl/a.out
0804a000-0804b000 rw-p 00001000 03:08 1167003 /root/development/opencl/a.out
0804b000-08087000 rw-p 0804b000 00:00 0 [heap]
b6000000-b6021000 rw-p b6000000 00:00 0
b6021000-b6100000 —p b6021000 00:00 0
b6191000-b6192000 rw-p b6191000 00:00 0
b6192000-b6292000 rw-s 113cd000 00:0d 9801 /dev/nvidia0
b6292000-b6392000 rw-s 0f5ca000 00:0d 9801 /dev/nvidia0
b6392000-b6492000 rw-s 112c6000 00:0d 9801 /dev/nvidia0
b6492000-b6592000 rw-s 1eefc000 00:0d 9801 /dev/nvidia0
b6592000-b6593000 rw-s 1eef9000 00:0d 9801 /dev/nvidia0
b6593000-b6594000 rw-s cfc04000 00:0d 9801 /dev/nvidia0
b6594000-b6595000 rw-s 1eef8000 00:0d 9801 /dev/nvidia0
b6595000-b6997000 rw-s 10de0000 00:0d 9801 /dev/nvidia0
b6997000-b6d99000 rw-s 0f459000 00:0d 9801 /dev/nvidia0
b6d99000-b6d9a000 rw-p b6d99000 00:00 0
b6d9a000-b6da4000 r-xp 00000000 03:08 1403618 /usr/lib/
b6da4000-b6da5000 rw-p 00009000 03:08 1403618 /usr/lib/
b6da5000-b6dc9000 r-xp 00000000 03:08 383594 /lib/
b6dc9000-b6dcb000 rw-p 00023000 03:08 383594 /lib/
b6dcb000-b6dcd000 r-xp 00000000 03:08 383593 /lib/
b6dcd000-b6dcf000 rw-p 00001000 03:08 383593 /lib/
b6dcf000-b6dd0000 rw-p b6dcf000 00:00 0
b6dd0000-b6de3000 r-xp 00000000 03:08 1403601 /usr/lib/
b6de3000-b6de4000 rw-p 00012000 03:08 1403601 /usr/lib/
b6de4000-b6df7000 r-xp 00000000 03:08 383604 /lib/
b6df7000-b6df9000 rw-p 00013000 03:08 383604 /lib/
b6df9000-b6dfb000 rw-p b6df9000 00:00 0
b6dfb000-b758d000 r-xp 00000000 03:08 875337 /usr/lib/
b758d000-b797f000 rw-p 00791000 03:08 875337 /usr/lib/
b797f000-b7988000 rw-p b797f000 00:00 0
b7988000-b7ace000 r-xp 00000000 03:08 383590 /lib/
b7ace000-b7acf000 r–p 00146000 03:08 383590 /lib/
b7acf000-b7ad1000 rw-p 00147000 03:08 383590 /lib/
b7ad1000-b7ad4000 rw-p b7ad1000 00:00 0
b7ad4000-b7ed3000 r-xp 00000000 03:08 875332 /usr/lib/
b7ed3000-b7ed8000 rw-p 003ff000 03:08 875332 /usr/lib/
b7ed8000-b7efe000 rw-p b7ed8000 00:00 0
b7f08000-b7f09000 rw-s cfc02000 00:0d 9801 /dev/nvidia0
b7f09000-b7f0a000 rw-s 10e1b000 00:0d 9801 /dev/nvidia0
b7f0a000-b7f1b000 rw-s 1ee5e000 00:0d 9801 /dev/nvidia0
b7f1b000-b7f1c000 r–s cf009000 00:0d 9801 /dev/nvidia0
b7f1c000-b7f1d000 rw-p b7f1c000 00:00 0
b7f1d000-b7f39000 r-xp 00000000 03:08 383632 /lib/
b7f39000-b7f3b000 rw-p 0001b000 03:08 383632 /lib/
bfcfb000-bfd0e000 rwxp bffeb000 00:00 0 [stack]
bfd0e000-bfd10000 rw-p bfffe000 00:00 0
ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]

You have ‘;’ missing at the end of the next-to-last statement. But this is probably not the reason for your code to crash - I guess you are just not properly zero-terminating your kernel string, once loaded from file, and before supplied to clCreateProgramFromSource().

Thanks for replying cgorac.

The ; is cut and paste error.

As for the zero-terminating, I checked and double checked for the zero-termination. It is fine. Also double checked with the string length and stuff

This is my current file

--------------------------------------------------------------- starts from the next line

__kernel void ptop(__global int *a, __global int *b, __global int *c)


    size_t global_id = get_global_id(0);

a[global_id] = a[global_id] + b[global_id];


-----------------------------------------------------------------the previous line is a blank line, which I normally give for all my source files

I printed the source I retrieved from the file, which I then supply to the buildprogram() and it seems to be prefect, including the NULL termination.

Now I changed the source file for another extra newline

--------------------------------------------------------------- starts from the next line

__kernel void ptop(__global int *a, __global int *b, __global int *c)


    size_t global_id = get_global_id(0);

a[global_id] = a[global_id] + b[global_id];


-----------------------------------------------------------------ends at the previous line

and all of a sudden the program builds fine. Now this surprises me, since this shouldn’t be a reason for it to fatally crash. The program source supplied is fine and is properly NULL terminated(checked that). Also I have other program files that don’t have that extra blank line and they work fine. I am printing all my program sources before supplying to clBuildProgramFromSource() and they are showing me the right source.

Well, if I put your kernel code (any version you posted that is syntax-correct) say in file, and then if I use my standard test file (put say in foo.c):

#include <assert.h>

#include <stdio.h>

#include <stdlib.h>

#include <CL/cl.h>


#define KERNEL_FILE ""



#define KERNEL_NAME "foo"


#ifndef USE_GPU

#define USE_GPU 1


static char	*

loadProgramSource(const char *path)


	FILE		   *file = fopen(path, "r");

	assert(file != NULL);

	fseek(file, 0, SEEK_END);

	long			size = ftell(file);

	fseek(file, 0, SEEK_SET);

	char		   *source = (char *) malloc(size + 1);

	assert(source != NULL);

	assert(fread(source, sizeof(char), size, file) == size);

	source = 0;


	return source;





	cl_int		  error;

	cl_device_id	devices;


	error = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &devices, NULL);


	error = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &devices, NULL);


	assert(error == CL_SUCCESS);

	cl_context	  context =

	clCreateContext(NULL, 1, &devices, NULL, NULL, &error);

	assert(error == CL_SUCCESS);

	cl_command_queue queue =

	clCreateCommandQueue(context, devices, 0, &error);

	assert(error == CL_SUCCESS);

	char		   *source = loadProgramSource(KERNEL_FILE);

	assert(source != NULL);

	cl_program	  program =

	clCreateProgramWithSource(context, 1, (const char **) &source,

				  NULL, &error);

	assert(error == CL_SUCCESS);

	error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

	assert(error == CL_SUCCESS);

	cl_kernel	   kernel = clCreateKernel(program, KERNEL_NAME, &error);

	assert(error == CL_SUCCESS);



	// do something here

	error = clReleaseKernel(kernel);

	assert(error == CL_SUCCESS);

	error = clReleaseProgram(program);

	assert(error == CL_SUCCESS);

	error = clReleaseCommandQueue(queue);

	assert(error == CL_SUCCESS);

	error = clReleaseContext(context);

	assert(error == CL_SUCCESS);

	return 0;


// Local Variables:

// c-basic-offset: 4

// End:

and if I compile:

gcc -DKERNEL_NAME='"ptop"' -o foo foo.c -lOpenCL

and then run, everything goes fine (my setup is 3.0-beta SDK, with corresponding driver, on a 64-bit Linux machine). So - I still suspect you made some error in your code up to clBuildKernel() call, thus you may wish to try with above test program, and see if it works for you.

Guess you were right. I was retrieving the size of the program from the file as size and instead of allocating size + 1 for the extra NULL, I was allocating just size. Thanks a lot cgorac.