I’m working on a custom implementation using cuDNN’s Graph API, but I’m encountering an error when creating the engineConfig. I’m trying to implement this directly using cuDNN’s backend API calls, without using the cudnn_frontend library.
Here’s the problematic part of my code:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cudnn.h>
void assertDescriptorIsNull(cudnnBackendDescriptor_t desc) {
if (desc == NULL) {
fprintf(stderr, "Error: descriptor is not NULL\n");
exit(-1);
}
}
cudnnBackendDescriptor_t tensorDescriptorCreate(
int64_t numDim,
int64_t *dim,
int64_t *stride,
int64_t byteAlignment,
cudnnDataType_t dataType,
std::string name
) {
const char *name_ptr = name.c_str();
cudnnBackendDescriptor_t tensorDesc;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_TENSOR_DESCRIPTOR, &tensorDesc));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendSetAttribute(tensorDesc,
CUDNN_ATTR_TENSOR_DATA_TYPE, CUDNN_TYPE_DATA_TYPE, 1, &dataType));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendSetAttribute(tensorDesc,
CUDNN_ATTR_TENSOR_DIMENSIONS, CUDNN_TYPE_INT64, numDim, dim));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendSetAttribute(tensorDesc,
CUDNN_ATTR_TENSOR_STRIDES, CUDNN_TYPE_INT64, numDim, stride));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendSetAttribute(tensorDesc,
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT, CUDNN_TYPE_INT64, 1, &byteAlignment));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendSetAttribute(tensorDesc,
CUDNN_ATTR_TENSOR_UNIQUE_ID, CUDNN_TYPE_INT64, 1, name_ptr));
assertDescriptorIsNull(tensorDesc);
CHECK_CUDNN(cudnnBackendFinalize(tensorDesc));
assertDescriptorIsNull(tensorDesc);
return tensorDesc;
}
cudnnBackendDescriptor_t init_graph(cudnnHandle_t cudnn) {
cudnnBackendDescriptor_t graph;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR, &graph));
CHECK_CUDNN(cudnnBackendSetAttribute(graph, CUDNN_ATTR_OPERATIONGRAPH_HANDLE, CUDNN_TYPE_HANDLE, 1, &cudnn));
return graph;
}
void finalize_graph(cudnnBackendDescriptor_t graph) {
CHECK_CUDNN(cudnnBackendFinalize(graph));
}
cudnnBackendDescriptor_t create_engine_by_graph(cudnnBackendDescriptor_t graph) {
cudnnBackendDescriptor_t engine;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINE_DESCRIPTOR, &engine));
CHECK_CUDNN(cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_OPERATION_GRAPH,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &graph));
int64_t gidx = 0;
CHECK_CUDNN(cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_GLOBAL_INDEX,
CUDNN_TYPE_INT64, 1, &gidx));
CHECK_CUDNN(cudnnBackendFinalize(engine));
return engine;
}
struct EngineConfig {
cudnnBackendDescriptor_t engcfg;
int64_t workspaceSize;
};
struct EngineConfig engineConfigDescriptorCreate(cudnnBackendDescriptor_t engine) {
cudnnBackendDescriptor_t engcfg;
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINECFG_DESCRIPTOR, &engcfg));
CHECK_CUDNN(cudnnBackendSetAttribute(engcfg, CUDNN_ATTR_ENGINECFG_ENGINE,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &engine));
/// here
CHECK_CUDNN(cudnnBackendFinalize(engcfg)); /// error here!!!!
/// here
int64_t workspaceSize;
CHECK_CUDNN(cudnnBackendGetAttribute(engcfg, CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE,
CUDNN_TYPE_INT64, 1, NULL, &workspaceSize));
struct EngineConfig config = {engcfg, workspaceSize};
return config;
}
class NormConfig {
private:
cudnnHandle_t cudnn;
cudnnBackendDescriptor_t norm_desc;
cudnnBackendDescriptor_t mode;
cudnnBackendDescriptor_t phase;
cudnnBackendDescriptor_t x_desc;
cudnnBackendDescriptor_t y_desc;
cudnnBackendDescriptor_t mean_desc;
cudnnBackendDescriptor_t inv_var_desc;
cudnnBackendDescriptor_t scale_desc;
cudnnBackendDescriptor_t bias_desc;
cudnnBackendDescriptor_t epsilon_desc;
cudnnBackendDescriptor_t input_running_mean_desc;
cudnnBackendDescriptor_t input_running_var_desc;
cudnnBackendDescriptor_t output_running_mean_desc;
cudnnBackendDescriptor_t output_running_var_desc;
cudnnBackendDescriptor_t op_graph;
void setAttribute(cudnnBackendDescriptor_t desc, cudnnBackendAttributeName_t attr, cudnnBackendAttributeType_t type, int64_t num, void *value) {
CHECK_CUDNN(cudnnBackendSetAttribute(desc, attr, type, num, value));
}
void setTensorAttribute(cudnnBackendDescriptor_t desc, cudnnBackendAttributeName_t attr, cudnnBackendDescriptor_t tensor_desc) {
this->setAttribute(desc, attr, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &tensor_desc);
}
public:
NormConfig(cudnnHandle_t cudnn_, cudnnBackendDescriptor_t graph) : cudnn(cudnn_), op_graph(graph) {}
void CreateNormDesc(
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
cudnnBackendNormMode_t mode,
cudnnBackendNormFwdPhase_t phase
) {
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR, &this->norm_desc));
setAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_MODE, CUDNN_TYPE_NORM_MODE, 1, &mode);
setAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_PHASE, CUDNN_TYPE_NORM_FWD_PHASE, 1, &phase);
int64_t dims[4] = {batch_size, channels, height, width};
int64_t strides[4] = {channels * height * width, height * width, width, 1};
int64_t scalar[4] = {1, 1, 1, 1};
int64_t dim2d[4] = {1, channels, 1, 1};
int64_t dim2d_stride[4] = {channels, 1, channels, channels};
this->x_desc = tensorDescriptorCreate(4, dims, strides, 4, CUDNN_DATA_FLOAT, std::string("x"));
this->y_desc = tensorDescriptorCreate(4, dims, strides, 4, CUDNN_DATA_FLOAT, std::string("y"));
this->mean_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("mean"));
this->inv_var_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("inv_var"));
this->scale_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("scale"));
this->bias_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("bias"));
this->epsilon_desc = tensorDescriptorCreate(4, scalar, scalar, 4, CUDNN_DATA_FLOAT, std::string("epsilon"));
this->input_running_mean_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("input_running_mean"));
this->input_running_var_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("input_running_var"));
this->output_running_mean_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("output_running_mean"));
this->output_running_var_desc = tensorDescriptorCreate(4, dim2d, dim2d_stride, 4, CUDNN_DATA_FLOAT, std::string("output_running_var"));
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_XDESC, this->x_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC, this->mean_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC, this->inv_var_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC, this->scale_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC, this->bias_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC, this->epsilon_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC, this->input_running_mean_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC, this->input_running_var_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC, this->output_running_mean_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC, this->output_running_var_desc);
setTensorAttribute(this->norm_desc, CUDNN_ATTR_OPERATION_NORM_FWD_YDESC, this->y_desc);
CHECK_CUDNN(cudnnBackendFinalize(this->norm_desc));
};
void register_graph() {
CHECK_CUDNN(cudnnBackendSetAttribute(this->op_graph, CUDNN_ATTR_OPERATIONGRAPH_OPS, CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &this->norm_desc));
}
};
int main() {
cudnnHandle_t cudnn;
CHECK_CUDNN(cudnnCreate(&cudnn));
cudnnBackendDescriptor_t graph = init_graph(cudnn);
NormConfig norm_config = NormConfig(cudnn, graph);
norm_config.CreateNormDesc(4, 32, 16, 16, CUDNN_BATCH_NORM, CUDNN_NORM_FWD_TRAINING);
norm_config.register_graph();
finalize_graph(graph);
cudnnBackendDescriptor_t engine = create_engine_by_graph(graph);
struct EngineConfig config = engineConfigDescriptorCreate(engine);
return 0;
}
When I run this code, I get an error during the creation of the engineConfig. I suspect I might be using the Graph API incorrectly or missing some crucial steps.
Error Message
CUDNN Error: /path to file :87, reason: CUDNN_STATUS_NOT_SUPPORTED
Environment:
- CUDA version: 12.4
- cuDNN version: 9.0.0
- GPU: NVIDIA GeForce RTX 2080Ti
- OS: Ubuntu 22.04
- Compiler: GCC 9.3.0
- Build system: CMake 3.16.3
Questions:
- What are the correct steps to create an engineConfig when using cuDNN’s Graph API?
- Are there any common issues or things I should check that might be causing this error?
- Is there a way to correctly implement the Graph API using direct backend API calls without resorting to cudnn_frontend?
Any advice would be greatly appreciated. Thank you in advance for your help!