I am trying to trace 3 counters in a CUDA program as follows:
void *
sampling_func(void *arg)
{
CUptiResult cuptiErr;
CUpti_EventGroup eventGroup;
CUpti_EventID gldrID, gldhitID, gldmissID; //eventId;
size_t bytesRead;
uint64_t eventVal;
cuptiErr = cuptiSetEventCollectionMode(context,
CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiSetEventCollectionMode");
cuptiErr = cuptiEventGroupCreate(context, &eventGroup, 0);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupCreate");
cuptiErr = cuptiEventGetIdFromName(device, gld_request, &gldrID); //"gld_request"
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetIdFromName");
cuptiErr = cuptiEventGetIdFromName(device, l1_gld_hit, &gldhitID); //"l1_global_load_hit"
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetIdFromName");
cuptiErr = cuptiEventGetIdFromName(device, l1_gld_miss, &gldmissID); //"l1_global_load_miss"
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetIdFromName");
cuptiErr = cuptiEventGroupAddEvent(eventGroup, gldrID);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupAddEvent");
cuptiErr = cuptiEventGroupAddEvent(eventGroup, gldhitID);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupAddEvent");
cuptiErr = cuptiEventGroupAddEvent(eventGroup, gldmissID);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupAddEvent");
cuptiErr = cuptiEventGroupEnable(eventGroup);
CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupEnable");
Although when at run time, I get following error:
Error CUPTI_ERROR_INVALID_EVENT_ID for CUPTI API function 'cuptiEventGroupAddEvent'.
This points to the send call to cuptiEventGroupAddEvent for gldhitID. I tried various combinations and it seems I cannot add more counters at the same time, which I believe should not be the case. Also all these counters work one at a time well.
I am using CUDA compute compatibility 2.0 device.
Any suggestions? What am I doing wrong? I am using CUPTI sample code as reference.