NX JP44 cudnn internal logic error

cudnnConvolutionBiasActivationForward is throwing a std::logic_error for me on the NX with Jetpack 4.4DP, while working on both the TX2 (earlier Jetpack) and the desktop (cudnn 7.6). It also works if I break up the operation into cudnnConvolutionForward, cudnnAddTensor, cudnnActivationForward.

Backtrace:

terminate called after throwing an instance of 'std::logic_error'
  what():  basic_string::_M_construct null not valid

Thread 1 "cudnn_test" received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51	../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x0000007fa67e58b4 in __GI_abort () at abort.c:79
#2  0x0000007fa66b110c in __gnu_cxx::__verbose_terminate_handler() ()
   from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#3  0x0000007fa66aec54 in ?? () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#4  0x0000007fa66aeca0 in std::terminate() () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#5  0x0000007fa66aef68 in __cxa_throw () from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#6  0x0000007fa66d76e8 in std::__throw_logic_error(char const*) ()
   from /usr/lib/aarch64-linux-gnu/libstdc++.so.6
#7  0x0000007fa112d394 in cudnn::gemm::SassTreeOperator::insert(cudnn::gemm::ShaderParams const*, int) () from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#8  0x0000007fa112d570 in cudnn::gemm::SassTreeOperator::build() ()
   from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#9  0x0000007fa112d868 in cudnn::gemm::SassTreeOperator::SassTreeOperator(cudnnContext*) ()
   from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#10 0x0000007fa11164a8 in cudnn::gemm::conv2dFindConfig(cudnnContext*, cudnnTensor4dStruct*, cudnnFilter4dStruct*, cudnnConvolutionStruct*, cudnnTensor4dStruct*, cudnn::gemm::Conv2dType_t, cudnn::gemm::Conv2dConfig&) () from /usr/lib/aarch64-linux-gnu/libcudnn_etc.so.8
#11 0x0000007fb05118b8 in cudnnConvolution4dForward(cudnnContext*, void const*, cudnnTensorStruct*, void const*, cudnnFilterStruct*, void const*, cudnnConvolutionStruct*, cudnnConvWorkingStruct const*, cudnnConvolutionFwdAlgo_t, void*, unsigned long, bool, void const*, void*, void const*, cudnnActivationStruct*, cudnnTensorStruct*, void*) () from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#12 0x0000007fafbd6878 in ?? () from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#13 0x0000007fb0359b40 in cudnnConvolutionBiasActivationForward ()
   from /usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer.so.8
#14 0x000000555555dff0 in main (argc=1, argv=0x7ffffff0d8) at cudnn_test.cpp:62

Minimal an example:

//nvcc -g -arch=sm_72 -o cudnn_test cudnn_test.cpp -lcudnn_cnn_infer -lcudnn_ops_infer
#include <stdio.h>
#include <stdexcept>
//#include <cudnn.h>
#include <cuda_runtime.h>
#include <cudnn_cnn_infer_v8.h>
#include <cudnn_ops_infer_v8.h>

#define CHK(f) do { int rc = f; if (rc) { printf("%s:%d: err %d\n", __FILE__, __LINE__, rc); \
                                          return rc; } } while(0)

int main(int argc, char *argv[])
{
	const int height=1024, width=1024;
	const int nfilt=32, nchan=4, fsz=3, pad=1, stride=1;//2;
	const float _one = 1, _zero = 0;
	
	const cudnnDataType_t        _dtype	= CUDNN_DATA_HALF;
	const cudnnDataType_t        _ctype	= CUDNN_DATA_FLOAT;  //HALF;
	const cudnnTensorFormat_t    _format	= CUDNN_TENSOR_NCHW; //NHWC;
	const cudnnConvolutionMode_t _mode	= CUDNN_CROSS_CORRELATION;
	const cudnnNanPropagation_t  _nan_prop	= CUDNN_NOT_PROPAGATE_NAN;
	const cudnnConvolutionFwdPreference_t _algo_pref = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
	
	unsigned char *X, *Y, *W, *T, *b;
	cudnnHandle_t _cudnn_h = nullptr;
	cudnnTensorDescriptor_t Xt, Yt, bt;
	cudnnFilterDescriptor_t Wt;
	cudnnConvolutionDescriptor_t op;
	cudnnConvolutionFwdAlgo_t algo;
	cudnnActivationDescriptor_t actf;
	size_t wssz;
	int ydim[4];
	
	CHK(cudaMalloc(&X, nchan * height*stride * width*stride *2));
	CHK(cudaMalloc(&Y, nfilt * height        * width        *2));
	CHK(cudaMalloc(&W, nfilt * nchan * fsz * fsz *2 + nfilt *4));
	b = W + nfilt*nchan*fsz*fsz*2;
	CHK(cudaMemset(X, 0x3c, nchan * height*stride * width*stride *2));
	CHK(cudaMemset(Y, 0x00, nfilt * height        * width        *2));
	CHK(cudaMemset(W, 0x3c, nfilt * nchan * fsz * fsz *2 + nfilt *4));
	
	CHK( cudnnCreate(&_cudnn_h) );
	CHK( cudnnCreateTensorDescriptor(&Xt) );
	CHK( cudnnCreateTensorDescriptor(&Yt) );
	CHK( cudnnCreateTensorDescriptor(&bt) );
	CHK( cudnnCreateFilterDescriptor(&Wt) );
	CHK( cudnnCreateConvolutionDescriptor(&op) );
	CHK( cudnnCreateActivationDescriptor(&actf) );
	
	CHK( cudnnSetTensor4dDescriptor(Xt, _format, _dtype, 1, nchan, height*stride, width*stride) );
	CHK( cudnnSetFilter4dDescriptor(Wt, _dtype, _format, nfilt,nchan,fsz,fsz) );
	CHK( cudnnSetTensor4dDescriptor(bt, _format, _dtype, 1,nfilt,1,1) );
	CHK( cudnnSetActivationDescriptor(actf, CUDNN_ACTIVATION_RELU, _nan_prop, 0.) );
	CHK( cudnnSetConvolution2dDescriptor(op, pad,pad , stride,stride , 1,1 , _mode, _ctype) );
	CHK( cudnnGetConvolution2dForwardOutputDim(op, Xt, Wt, ydim+0,ydim+1,ydim+2,ydim+3) );
	CHK( cudnnSetTensor4dDescriptor(Yt, _format, _dtype, ydim[0],ydim[1],ydim[2],ydim[3]) );
	CHK( cudnnGetConvolutionForwardAlgorithm(_cudnn_h, Xt, Wt, op, Yt, _algo_pref, 0, &algo) );
	CHK( cudnnGetConvolutionForwardWorkspaceSize(_cudnn_h, Xt, Wt, op, Yt, algo, &wssz) );
	CHK( cudaMalloc(&T, wssz) );
	
#if 1 //Throws std::logic_error "basic_string::_M_construct null not valid"

	CHK( cudnnConvolutionBiasActivationForward(_cudnn_h, &_one, Xt,X, Wt,W, op,algo,
					T,wssz, &_zero, Yt,Y, bt,b, actf, Yt,Y) );
#else //Works:
	CHK( cudnnConvolutionForward(_cudnn_h, &_one, Xt,X, Wt,W, op, algo, T,wssz, &_zero, Yt,Y) );
	CHK( cudnnAddTensor(_cudnn_h, &_one,bt,b, &_one,Yt,Y) );
	CHK( cudnnActivationForward(_cudnn_h, actf, &_one, Yt,Y, &_zero, Yt,Y) );
#endif
	return 0;
}

If there is a better place to report issues such as this please let me know.

I ran into same issue with my project. I found work-around by manually installing cudnn7.6 and it works just fine. I think their new cudnn8.0 may have an issue.

Hi,

Thanks for the sample.
This looks like also a regression issue to us.

We will try to reproduce this and share more information later.
Thanks.

Hi,

Thanks for your patience.

We found this issue is related to the IMMA kernel support in sm72 device.
The support will be added in our next release.

Currently, please update your forwarding algorithm into 0 to skip this error.

#if 1 //Throws std::logic_error "basic_string::_M_construct null not valid"
        algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
        CHK( cudnnConvolutionBiasActivationForward(_cudnn_h, &_one, Xt,X, Wt,W, op,algo,
                                        T,wssz, &_zero, Yt,Y, bt,b, actf, Yt,Y) );
#else //Works:
        ...

Thanks.

Thanks for the fix. Now it works with cudnn8. However, my AI program performance dropped 50%. If I go back to cudnn7.6 again the performance doubles. Hopefully the formal fix in cudnn8 will not have performance issue.

1 Like