I am using NVIDIA GeForce GTX 1660 SUPER and cuda_11.4.2_471_41_win10.exe and
trying to profile a program like this:
cl_int _err;
const cl_queue_properties _queue_properties = CL_QUEUE_PROFILING_ENABLE;
cl::CommandQueue _queue(FContext, FDevice, &_queue_properties, &_err);
cl::NDRange _offset(0);
cl::NDRange _global_size(1000);
cl::Event _event;
_err = _queue.enqueueNDRangeKernel(FKernel,
_offset,
_global_size,
cl::NullRange,
nullptr,
&_event);
_event.wait();
const auto _status = _event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>(&_err);
const auto _queued = _event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(&_err);
I get “_status” equal to 0x0 (CL_COMPLETE), but the last line results in _err==-7
- CL_PROFILING_INFO_NOT_AVAILABLE
.
The result of getProfilingInfo
call is a small number like 4 or 5 and stays the same for all subsequent calls.
What am I doing wrong?