Losing piece of outputbuffer


I have this simple kernel

__kernel void simAnnealing(__global int *in,

						   __global int *dist,

						   __global int *randomFeed,

						   __global int *out,

						   __local int8 *schedule,

						   __local int8 *sharedSched,

						   __local int *distances,

						   const int numbOfTeams,

						   __global float *costResult){


	int numbOfRounds = (2 * numbOfTeams) - 2;

	int index = 0;

	for(int i = 0; i < numbOfRounds; i++){

		for(int j = 0; j < numbOfTeams; j++){

			index = (numbOfTeams * i) + j;

			out[(numbOfTeams * numbOfRounds) + index] = numbOfRounds;




Don’t minde the other arguments. My ATI implementation is bigger and works fine.

My nVidia implementation however is not. When i try to read the outputbuffer only the first columns and rows of my array are filled with values.

I define my arguments

oclHandles.kernel.setArg(0, oclBuffers.inCL);										//Argumenten toevoegen

		oclHandles.kernel.setArg(1, oclBuffers.distCL);

		oclHandles.kernel.setArg(2, oclBuffers.randomCL);

		oclHandles.kernel.setArg(3, oclBuffers.outCL);

		oclHandles.kernel.setArg(4, sizeof(cl_int) * params.teams * params.rounds, NULL);

		oclHandles.kernel.setArg(5, sizeof(cl_int) * params.teams * params.rounds, NULL);

		oclHandles.kernel.setArg(6, sizeof(cl_int) * params.teams * params.teams, NULL);

		oclHandles.kernel.setArg(7, params.teams);

		oclHandles.kernel.setArg(8, oclBuffers.resultCL);

I copy from cl_mem to a readable buffer

err = oclHandles.queue.enqueueReadBuffer(oclBuffers.outCL, CL_TRUE, 0, (params.teams * params.rounds), hostBuffers.outH);		//CL outputbuffer naar hostOutputbuffer schrijven

I read from buffer

//van 1D array naar 2D arry

	for(int i = 0; i < params.teams; i++){

		for(int j = 0; j < params.rounds; j++){

			int index =  ((params.teams * j) + i);

			outputSched[i][j] = hostBuffers.outH[index];




Anyone has an idee?

thx in advance