Hi,
I am trying to use cuda graph to capture my baseline implementation calling cusparse like below
CHECK_ERROR(cudaStreamCreate(&stream));
CHECK_ERROR(cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal));
for (int i = 0; i <. 10000; i++) {
CHECK_CUSPARSE( cusparseCreate(&handle) );
CHECK_CUSPARSE( cusparseCreateCsr(&matA, A_num_rows, A_num_cols, A_nnz,
A_rowoff, A_colidx, A_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F) );
....
CHECK_CUSPARSE( cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, matA, vecX, &beta, vecY, CUDA_R_32F,
CUSPARSE_SPMV_ALG_DEFAULT, dBuffer) );
}
CHECK_ERROR( cudaStreamEndCapture(stream, &graph) );
I found that if I put the cusparse create inside the loop, the cugraph will pose an error
Capturing CUDA kernel...
** On entry to cusparseCreate(): CUDA context cannot be initialized
Cannot I put the cusparse create() inside the loop if I want to use cuda graph?
If I can, how should I modified it?