I hope to find a way to launch a graph from the HOST node.
According to the Runtime API documentation, I know I can’t directly call the CUDA function in the callback of the HOST node. So that I can’t directly call cudaLaunch()
to launch an executable graph in the HOST node either.
After consideration, I think I can create a child thread in the HOST node, and call cudaLaunch()
in the child thread. I suppose the child thread maybe has nothing to do with the graph so this way should work. I wrote the code but the program halts at the cudaStreamSynchronize(stream)
in the child thread invoked from the HOST node and it doesn’t show any errors.
I know maybe creating a child thread is not a good solution but it is the only one I figured out after several hours.
I am using CUDA11.4 which doesn’t support conditional node type(supported by CUDA12.3.1).
I hope to add some control flow like if
statement in the HOST node to determine whether to launch the graph. So the child node type doesn’t meet my needs.
I hope to figure out why the program halts at the cudaStreamSynchronize(stream)
and to achieve graph launches from the HOST node. My code is as below:
#include<stdio.h>
#include<pthread.h>
#define CUDA_CHECK_ERROR(f, msg)\
{\
cudaError_t error = f;\
if (error != cudaSuccess) {\
printf("File %s Line %d occurs error, error msg : %s, error code : %d, error name : %s, user msg : %s\n", __FILE__, __LINE__, cudaGetErrorString(error), error, cudaGetErrorName(error), msg);\
}\
}
// called by child thread.
void* launchGraph(void* userData) {
cudaGraphExec_t graphExec = *(cudaGraphExec_t*)userData;
cudaStream_t stream;
CUDA_CHECK_ERROR(cudaStreamCreate(&stream), "call cudaStreamCreate failed.");
printf("after cudaStreamCreate.\n");
CUDA_CHECK_ERROR(cudaGraphLaunch(graphExec, stream), "call cudaGraphLaunch failed.");
printf("after cudaGraphLaunch.\n");
//************The program halts here and can't proceed************
//************The program halts here and can't proceed************
CUDA_CHECK_ERROR(cudaStreamSynchronize(stream), "call cudaStreamSynchronize failed."); printf("after cudaStreamSynchronize.\n");
cudaStreamDestroy(stream);
return nullptr;
}
// the callback function of the HOST node
void hostNodeCallBack(void* userData) {
// create a thread to launch the child executable graph
pthread_t thread;
pthread_create(&thread, NULL, launchGraph, userData);
pthread_join(thread, nullptr);
}
int main() {
// 1. create a child graph
cudaGraph_t childGraph;
cudaGraphExec_t childGraphExec;
cudaGraphCreate(&childGraph, 0);
cudaGraphNode_t node;
cudaGraphAddEmptyNode(&node, childGraph, NULL, 0);
cudaGraphInstantiate(&childGraphExec, childGraph, nullptr, nullptr, 0);
// 2. create a parent graph containing a host node
cudaGraph_t parentGraph;
cudaGraphExec_t parentGraphExec;
cudaGraphCreate(&parentGraph, 0);
// 2.1 add a host node to parent graph
cudaHostNodeParams params;
params.fn = hostNodeCallBack;
params.userData = &childGraphExec; // pass the child executable graph as argument
cudaGraphAddHostNode(&node, parentGraph, NULL, 0, ¶ms);
// 2.2 launch parent graph
cudaGraphInstantiate(&parentGraphExec, parentGraph, nullptr, nullptr, 0);
cudaGraphLaunch(parentGraphExec, cudaStreamDefault);
cudaStreamSynchronize(cudaStreamDefault);
// release resources
return 0;
}
Any help will be appreciated.
Jack