cudnnConvolutionBiasActivationForward is throwing a std::logic_error for me on the NX with Jetpack 4.4DP, while working on both the TX2 (earlier Jetpack) and the desktop (cudnn 7.6). It also works if I break up the operation into cudnnConvolutionForward, cudnnAddTensor, cudnnActivationForward.
Backtrace:
terminate called after throwing an instance of 'std::logic_error'
what(): basic_string::_M_construct null not valid
Thread 1 "cudnn_test" received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x0000007fa67e58b4 in __GI_abort () at abort.c:79
#2 0x0000007fa66b110c in __gnu_cxx::__verbose_terminate_handler() ()
from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#3 0x0000007fa66aec54 in ?? () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#4 0x0000007fa66aeca0 in std::terminate() () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#5 0x0000007fa66aef68 in __cxa_throw () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#6 0x0000007fa66d76e8 in std::__throw_logic_error(char const*) ()
from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#7 0x0000007fa112d394 in cudnn::gemm::SassTreeOperator::insert(cudnn::gemm::ShaderParams const*, int) () from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#8 0x0000007fa112d570 in cudnn::gemm::SassTreeOperator::build() ()
from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#9 0x0000007fa112d868 in cudnn::gemm::SassTreeOperator::SassTreeOperator(cudnnContext*) ()
from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#10 0x0000007fa11164a8 in cudnn::gemm::conv2dFindConfig(cudnnContext*, cudnnTensor4dStruct*, cudnnFilter4dStruct*, cudnnConvolutionStruct*, cudnnTensor4dStruct*, cudnn::gemm::Conv2dType_t, cudnn::gemm::Conv2dConfig&) () from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#11 0x0000007fb05118b8 in cudnnConvolution4dForward(cudnnContext*, void const*, cudnnTensorStruct*, void const*, cudnnFilterStruct*, void const*, cudnnConvolutionStruct*, cudnnConvWorkingStruct const*, cudnnConvolutionFwdAlgo_t, void*, unsigned long, bool, void const*, void*, void const*, cudnnActivationStruct*, cudnnTensorStruct*, void*) () from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#12 0x0000007fafbd6878 in ?? () from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#13 0x0000007fb0359b40 in cudnnConvolutionBiasActivationForward ()
from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#14 0x000000555555dff0 in main (argc=1, argv=0x7ffffff0d8) at cudnn_test.cpp:62
Minimal an example:
//nvcc -g -arch=sm_72 -o cudnn_test cudnn_test.cpp -lcudnn_cnn_infer -lcudnn_ops_infer
#include <stdio.h>
#include <stdexcept>
//#include <cudnn.h>
#include <cuda_runtime.h>
#include <cudnn_cnn_infer_v8.h>
#include <cudnn_ops_infer_v8.h>
#define CHK(f) do { int rc = f; if (rc) { printf("%s:%d: err %d\n", __FILE__, __LINE__, rc); \
return rc; } } while(0)
int main(int argc, char *argv[])
{
const int height=1024, width=1024;
const int nfilt=32, nchan=4, fsz=3, pad=1, stride=1;//2;
const float _one = 1, _zero = 0;
const cudnnDataType_t _dtype = CUDNN_DATA_HALF;
const cudnnDataType_t _ctype = CUDNN_DATA_FLOAT; //HALF;
const cudnnTensorFormat_t _format = CUDNN_TENSOR_NCHW; //NHWC;
const cudnnConvolutionMode_t _mode = CUDNN_CROSS_CORRELATION;
const cudnnNanPropagation_t _nan_prop = CUDNN_NOT_PROPAGATE_NAN;
const cudnnConvolutionFwdPreference_t _algo_pref = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
unsigned char *X, *Y, *W, *T, *b;
cudnnHandle_t _cudnn_h = nullptr;
cudnnTensorDescriptor_t Xt, Yt, bt;
cudnnFilterDescriptor_t Wt;
cudnnConvolutionDescriptor_t op;
cudnnConvolutionFwdAlgo_t algo;
cudnnActivationDescriptor_t actf;
size_t wssz;
int ydim[4];
CHK(cudaMalloc(&X, nchan * height*stride * width*stride *2));
CHK(cudaMalloc(&Y, nfilt * height * width *2));
CHK(cudaMalloc(&W, nfilt * nchan * fsz * fsz *2 + nfilt *4));
b = W + nfilt*nchan*fsz*fsz*2;
CHK(cudaMemset(X, 0x3c, nchan * height*stride * width*stride *2));
CHK(cudaMemset(Y, 0x00, nfilt * height * width *2));
CHK(cudaMemset(W, 0x3c, nfilt * nchan * fsz * fsz *2 + nfilt *4));
CHK( cudnnCreate(&_cudnn_h) );
CHK( cudnnCreateTensorDescriptor(&Xt) );
CHK( cudnnCreateTensorDescriptor(&Yt) );
CHK( cudnnCreateTensorDescriptor(&bt) );
CHK( cudnnCreateFilterDescriptor(&Wt) );
CHK( cudnnCreateConvolutionDescriptor(&op) );
CHK( cudnnCreateActivationDescriptor(&actf) );
CHK( cudnnSetTensor4dDescriptor(Xt, _format, _dtype, 1, nchan, height*stride, width*stride) );
CHK( cudnnSetFilter4dDescriptor(Wt, _dtype, _format, nfilt,nchan,fsz,fsz) );
CHK( cudnnSetTensor4dDescriptor(bt, _format, _dtype, 1,nfilt,1,1) );
CHK( cudnnSetActivationDescriptor(actf, CUDNN_ACTIVATION_RELU, _nan_prop, 0.) );
CHK( cudnnSetConvolution2dDescriptor(op, pad,pad , stride,stride , 1,1 , _mode, _ctype) );
CHK( cudnnGetConvolution2dForwardOutputDim(op, Xt, Wt, ydim+0,ydim+1,ydim+2,ydim+3) );
CHK( cudnnSetTensor4dDescriptor(Yt, _format, _dtype, ydim[0],ydim[1],ydim[2],ydim[3]) );
CHK( cudnnGetConvolutionForwardAlgorithm(_cudnn_h, Xt, Wt, op, Yt, _algo_pref, 0, &algo) );
CHK( cudnnGetConvolutionForwardWorkspaceSize(_cudnn_h, Xt, Wt, op, Yt, algo, &wssz) );
CHK( cudaMalloc(&T, wssz) );
#if 1 //Throws std::logic_error "basic_string::_M_construct null not valid"
CHK( cudnnConvolutionBiasActivationForward(_cudnn_h, &_one, Xt,X, Wt,W, op,algo,
T,wssz, &_zero, Yt,Y, bt,b, actf, Yt,Y) );
#else //Works:
CHK( cudnnConvolutionForward(_cudnn_h, &_one, Xt,X, Wt,W, op, algo, T,wssz, &_zero, Yt,Y) );
CHK( cudnnAddTensor(_cudnn_h, &_one,bt,b, &_one,Yt,Y) );
CHK( cudnnActivationForward(_cudnn_h, actf, &_one, Yt,Y, &_zero, Yt,Y) );
#endif
return 0;
}
If there is a better place to report issues such as this please let me know.