Hi,
I run simple cudnnn test code blow (copy from Minimal cuDNN C++ Example. A minimal 70-line cuDNN example that… | by Rohit Dwivedula | Medium):
#include <iostream>
#include <cuda_runtime.h>
#include <cudnn.h>
/**
* Minimal example to apply sigmoid activation on a tensor
* using cuDNN.
**/
int main(int argc, char** argv)
{
int numGPUs;
cudaGetDeviceCount(&numGPUs);
std::cout << "Found " << numGPUs << " GPUs." << std::endl;
cudaSetDevice(0); // use GPU0
int device;
struct cudaDeviceProp devProp;
cudaGetDevice(&device);
cudaGetDeviceProperties(&devProp, device);
std::cout << "Compute capability:" << devProp.major << "." << devProp.minor << std::endl;
cudnnHandle_t handle_;
cudnnCreate(&handle_);
std::cout << "Created cuDNN handle" << std::endl;
// create the tensor descriptor
cudnnDataType_t dtype = CUDNN_DATA_FLOAT;
cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW;
int n = 1, c = 1, h = 1, w = 10;
int NUM_ELEMENTS = n*c*h*w;
cudnnTensorDescriptor_t x_desc;
cudnnCreateTensorDescriptor(&x_desc);
cudnnSetTensor4dDescriptor(x_desc, format, dtype, n, c, h, w);
// create the tensor
float *x;
cudaMallocManaged(&x, NUM_ELEMENTS * sizeof(float));
for(int i=0;i<NUM_ELEMENTS;i++) x[i] = i * 1.00f;
std::cout << "Original array: ";
for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " ";
// create activation function descriptor
float alpha[1] = {1};
float beta[1] = {0.0};
cudnnActivationDescriptor_t sigmoid_activation;
cudnnActivationMode_t mode = CUDNN_ACTIVATION_SIGMOID;
cudnnNanPropagation_t prop = CUDNN_NOT_PROPAGATE_NAN;
cudnnCreateActivationDescriptor(&sigmoid_activation);
cudnnSetActivationDescriptor(sigmoid_activation, mode, prop, 0.0f);
cudnnActivationForward(
handle_,
sigmoid_activation,
alpha,
x_desc,
x,
beta,
x_desc,
x
);
cudnnDestroy(handle_);
std::cout << std::endl << "Destroyed cuDNN handle." << std::endl;
std::cout << "New array: ";
for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " ";
std::cout << std::endl;
cudaFree(x);
return 0;
}
But it block in cudnnCreate infinitely and gdb stack print:
0x0000ffffc5e1fb78 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
(gdb) bt
#0 0x0000ffffc5e1fb78 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#1 0x0000ffffc5e6b6fc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#2 0x0000ffffc5e73588 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#3 0x0000ffffc5e737e4 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#4 0x0000ffffc5d925c4 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#5 0x0000ffffc5d92730 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#6 0x0000ffffc5d6f730 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#7 0x0000ffffc5d70024 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#8 0x0000ffffc5f184d8 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#9 0x0000ffffc5f18544 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#10 0x0000ffffc5c49554 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#11 0x0000ffffc5c51134 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#12 0x0000ffffc5c54ab0 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#13 0x0000ffffc5c56080 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#14 0x0000ffffc5c4a840 in __cuda_CallJitEntryPoint () from /usr/lib/aarch64-linux-gnu/tegra/libnvidia-ptxjitcompiler.so.1
#15 0x0000fffff67145f0 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#16 0x0000fffff6746ccc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#17 0x0000fffff65bc6b0 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#18 0x0000fffff654870c in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#19 0x0000fffff6548d5c in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#20 0x0000ffffc87e2e0c in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#21 0x0000ffffc87d3600 in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#22 0x0000ffffc87e9484 in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#23 0x0000ffffc87eaa7c in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#24 0x0000ffffc87eafdc in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#25 0x0000ffffc87e09dc in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#26 0x0000ffffc87c2780 in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#27 0x0000ffffc87f7a2c in ?? () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#28 0x0000ffffc71bd090 in cudnnCreate () from /lib/aarch64-linux-gnu/libcudnn_ops_infer.so.8
#29 0x0000aaaaaaaa11b8 in main ()
(gdb)
Jetpack version: 5.1.2
CUDA version: 11.4
CUDNN version: 8.2.4
I think code should be fine, since it can work fine another board which Jetpack version is 5.1.1 and same CUDA verison.
Can anyone help me? Thanks a lot!
BR/Time