Hello
I have this simple kernel
__kernel void simAnnealing(__global int *in,
__global int *dist,
__global int *randomFeed,
__global int *out,
__local int8 *schedule,
__local int8 *sharedSched,
__local int *distances,
const int numbOfTeams,
__global float *costResult){
int numbOfRounds = (2 * numbOfTeams) - 2;
int index = 0;
for(int i = 0; i < numbOfRounds; i++){
for(int j = 0; j < numbOfTeams; j++){
index = (numbOfTeams * i) + j;
out[(numbOfTeams * numbOfRounds) + index] = numbOfRounds;
}
}
}
Don’t minde the other arguments. My ATI implementation is bigger and works fine.
My nVidia implementation however is not. When i try to read the outputbuffer only the first columns and rows of my array are filled with values.
I define my arguments
oclHandles.kernel.setArg(0, oclBuffers.inCL); //Argumenten toevoegen
oclHandles.kernel.setArg(1, oclBuffers.distCL);
oclHandles.kernel.setArg(2, oclBuffers.randomCL);
oclHandles.kernel.setArg(3, oclBuffers.outCL);
oclHandles.kernel.setArg(4, sizeof(cl_int) * params.teams * params.rounds, NULL);
oclHandles.kernel.setArg(5, sizeof(cl_int) * params.teams * params.rounds, NULL);
oclHandles.kernel.setArg(6, sizeof(cl_int) * params.teams * params.teams, NULL);
oclHandles.kernel.setArg(7, params.teams);
oclHandles.kernel.setArg(8, oclBuffers.resultCL);
I copy from cl_mem to a readable buffer
err = oclHandles.queue.enqueueReadBuffer(oclBuffers.outCL, CL_TRUE, 0, (params.teams * params.rounds), hostBuffers.outH); //CL outputbuffer naar hostOutputbuffer schrijven
I read from buffer
//van 1D array naar 2D arry
for(int i = 0; i < params.teams; i++){
for(int j = 0; j < params.rounds; j++){
int index = ((params.teams * j) + i);
outputSched[i][j] = hostBuffers.outH[index];
}
}
}
Anyone has an idee?
thx in advance