My CUPTI cant trace NVTX APIs

My code about tracing NVTX APIs are as follow:

else if (domain == CUPTI_CB_DOMAIN_NVTX)
{
if(callbackid == CUPTI_CBID_NVTX_nvtxRangePushA)
{
cout << “NVTX Range detected” << endl;
CtxProfilerData user_data;
CUpti_CallbackData const *pnvtxRangePushData = static_cast<CUpti_CallbackData const *>(pCallbackData);
CUcontext ctx = pnvtxRangePushData->context;
user_data.ctx = ctx;
if(pnvtxRangePushData->callbackSite == CUPTI_API_ENTER)
{
user_data_mutex.lock();
if(user_data_map.count(ctx))
{
if (user_data_map[ctx].cur_ranges == user_data_map[ctx].max_num_ranges)
{
EndSession(user_data_map[ctx]);
user_data_map[ctx].cur_ranges = 0;
}

                if (user_data_map[ctx].cur_ranges == 0)
                {
                    InitializeContext(user_data_map[ctx]);
                    StartSession(user_data_map[ctx]);
                }
                user_data_map[ctx].cur_ranges++;
            }
            else
            {
                user_data_map[ctx] = user_data;
                InitializeContext(user_data_map[ctx]);
                StartSession(user_data_map[ctx]);
                user_data_map[ctx].cur_ranges++;
            }
            user_data_mutex.unlock();
        }

    }
    if(callbackid == CUPTI_CBID_NVTX_nvtxRangePop)
    {
        cout << "NVTX Range pop detected" << endl;
        CUpti_CallbackData const *pnvtxRangePopData = static_cast<CUpti_CallbackData const *>(pCallbackData);
        CUcontext ctx = pnvtxRangePopData->context;
        if(pnvtxRangePopData->callbackSite == CUPTI_API_EXIT)
        {
            user_data_mutex.lock();
            EndSession(user_data_map[ctx]);
            user_data_map[ctx].cur_ranges = 0;
            user_data_mutex.unlock();
        }
    }
}

And for sure I have registered these callbacks:

void RegisterCallbacks()
{
CUpti_SubscriberHandle subscriber;
CUPTI_API_CALL(cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)ProfilerCallbackHandler, NULL));
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_DRIVER_API, CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel));
CUPTI_API_CALL(cuptiEnableCallback(1,subscriber,CUPTI_CB_DOMAIN_DRIVER_API,CUPTI_DRIVER_TRACE_CBID_cuMemAlloc));
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE, CUPTI_CBID_RESOURCE_CONTEXT_CREATED));
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API, CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020));
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_NVTX,CUPTI_CBID_NVTX_nvtxRangePushA));
CUPTI_API_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_NVTX,CUPTI_CBID_NVTX_nvtxRangePop));

atexit(EndExecution);

}

my cuda kernel code is like this:

nvtxRangePushA(“kernel_launch”);
LaunchKernelsMultiStreams(device_map[0]);
nvtxRangePop();

however,the output message dont contain anything about nvtx apis,which means my profiler failed to trace nvtx apis.
but by using nsys,the range covered by my nvtx apis do appear,which means nvtx take effect in my kernel,so whats the problem?

Please review the section 2. Usage — Cupti 12.9 documentation.

If correctly setting up NVTX environment variable does not work please specify the steps you are performing, the version of NVTX library in use, and a minimal reproducible including the test application if you are writing an injection vs. a single application.

thanks and this problem is successfully fixed using your suggestion