Hello, I’m recently studying Device Memory L2 Access Management from nvidia document.
I’m into implementing examples of nvidia document on python now.
However, declaring cuda graph node “cudaGraphNode_t node” is becoming an obstacle now.
To be more precise, I want to convert code below into python.
cudaGraph_t graph;
cudaGraphExec_t graphExec;
cudaStream_t stream;
size_t numNodes = 0;
cudaGraphNode_t* nodes = nullptr;
cudaGraphNode_t node;
cudaGraphGetNodes(graph, nullptr, &numNodes);
nodes = new cudaGraphNode_t[numNodes];
CUDA_CHECK(cudaGraphGetNodes(graph, nodes, &numNodes));
node = nodes[0];
cudaKernelNodeAttrValue node_attribute;
node_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(d_data);
node_attribute.accessPolicyWindow.num_bytes = total_size;
node_attribute.accessPolicyWindow.hitRatio = hitRatio;
node_attribute.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting;
node_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming;
CUDA_CHECK(cudaGraphKernelNodeSetAttribute(node, cudaKernelNodeAttributeAccessPolicyWindow, &node_attribute));
cudaGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0);
can anyone help me with converting code above into python?
Thank you in advance.