Hi everyone. I have made two HelloWorlds, with bare C++ and Cloo C# library, with same kernel code. Strange thing is, that Cloo completes faster, despite of much rough overhead.
C# Cloo - 13 seconds
watch.Start();
for (int ii = 0; ii < (repeats * repeatmultiplier); ii++)
{
queue.Execute(kernelVectorSum, null, new long[] { VECTOR_SIZE }, null, eventList);
}
bool done = false;
while(!done)
{
Thread.Sleep(1200);
if (eventList.Count(o => o.Status != ComputeCommandExecutionStatus.Complete) == 0)
{
done = true;
}
}
Console.WriteLine("GPU time:" + watch.ElapsedMilliseconds);
C++ - 17 seconds
size_t globalWorkSize[] = { VECTOR_SIZE };
cl_event queueEvents[repeats * repeatmultiplier];
startWatch();
for (int ii = 0; ii < (repeats * repeatmultiplier); ii++)
{
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, &queueEvents[ii]);
if(status!=CL_SUCCESS) die("Start queue yok");
}
if(clWaitForEvents(repeats * repeatmultiplier, queueEvents)!=CL_SUCCESS) die("Queue yok");
endWatch("GPU time");