Strange performance issue

Hi everyone. I have made two HelloWorlds, with bare C++ and Cloo C# library, with same kernel code. Strange thing is, that Cloo completes faster, despite of much rough overhead.

C# Cloo - 13 seconds

watch.Start();

	for (int ii = 0; ii < (repeats * repeatmultiplier); ii++)

	{

		queue.Execute(kernelVectorSum, null, new long[] { VECTOR_SIZE }, null, eventList);

	}

	bool done = false;

	while(!done)

	{

		Thread.Sleep(1200);

		if (eventList.Count(o => o.Status != ComputeCommandExecutionStatus.Complete) == 0)

		{

			done = true;

		}

	}

	Console.WriteLine("GPU time:" + watch.ElapsedMilliseconds);

C++ - 17 seconds

size_t globalWorkSize[] = { VECTOR_SIZE };

	cl_event queueEvents[repeats * repeatmultiplier];

	startWatch();

	for (int ii = 0; ii < (repeats * repeatmultiplier); ii++)

	{

		status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, &queueEvents[ii]);

		if(status!=CL_SUCCESS) die("Start queue yok");

	}

	if(clWaitForEvents(repeats * repeatmultiplier, queueEvents)!=CL_SUCCESS) die("Queue yok");

	endWatch("GPU time");