CUDNN_STATUS_EXECUTION_FAILED ERROR by call cudnnRNNForwardTrainingEx function.

I want to use cudnnRNNForwardTrainingEx for variable sequence length, but when I set variable sequences length by call

cudnnSetRNNDataDescriptor(_x_data_desc
                      , _cudnn_data_type
                      , CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED//curent fix time major
                      , step
                      , batch
                      , _input_size
                      , seq_len
                      , (void *)&fill_value //
                      ); 
       cudnnSetRNNDataDescriptor(_y_data_desc
                      , _cudnn_data_type
                      , CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED//curent fix time major
                      , step
                      , batch
                      , _hidden_size * _dir_count
                      , seq_len
                      , (void *)&fill_value //, NULL
                      );

Then I call

CudnnRunCheckRet(
                    cudnnRNNForwardTrainingEx(
                        _handle
                        , _rnn_desc
                        , _x_data_desc, gx->void_ptr()
                        , _ht_desc, init_ht
                        , _ct_desc, init_ct
                        , _par_desc, gw->void_ptr()
                        , _y_data_desc, gy->void_ptr()
                        , _ht_desc, out_ht
                        , _ct_desc, out_ct
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , g_work->void_ptr(), _work_size
                        , g_reser->void_ptr(), _reserver_size
                        ));

I got error CUDNN_STATUS_EXECUTION_FAILED , But only set seq_len all equals step, it run correct, What’s problem ?

Env Detail :
Tesla T4
NVIDIA-SMI 440.33.01 Driver Version: 440.33.01 CUDA Version: 10.2
Cudnn v7.6.5

Sample code detail :

auto rnn_direction_mode = _bi_direction ?
                     CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;

        CudnnRunCheckRet(cudnnDropoutGetStatesSize(_handle, &_dropout_state_size));
        CudaRunCheckRet(cudaMalloc(&_dropout_state_ptr, _dropout_state_size));
        CudnnRunCheckRet(cudnnSetDropoutDescriptor(_dropout_desc, _handle, 0.0f
                            , _dropout_state_ptr, _dropout_state_size, 0));
        CudnnRunCheckRet(cudnnSetRNNDescriptor_v6(_handle
                        , _rnn_desc
                        , _hidden_size, _stack_layer_num
                        , _dropout_desc, _rnn_input_mode
                        , rnn_direction_mode, _cudnn_rnn_mode
                        , CUDNN_RNN_ALGO_STANDARD
                        , _cudnn_data_type));
       CudnnRunCheckRet(
                cudnnSetRNNMatrixMathType(_rnn_desc, CUDNN_TENSOR_OP_MATH));
       float fill_value = 0;
        CudnnRunCheckRet(
                cudnnSetRNNDataDescriptor(_x_data_desc
                    , _cudnn_data_type
                    , CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED//curent fix time major
                    , step
                    , batch
                    , _input_size
                    , seq_len
                    , (void *)&fill_value //, NULL
                    ));
        CudnnRunCheckRet(
                cudnnSetRNNDataDescriptor(_y_data_desc
                    , _cudnn_data_type
                    , CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED//curent fix time major
                    , step
                    , batch
                    , _hidden_size * _dir_count
                    , seq_len
                    , (void *)&fill_value //, NULL
                    ));

         _x0_desc.set3dDesc(_cudnn_data_type, batch, _input_size, 1);
        _y0_desc.set3dDesc(_cudnn_data_type, batch, _hidden_size * _dir_count, 1);
        _state_size = 2 * _dir_count * _stack_layer_num * batch * _hidden_size ;
        _ht_desc.set3dDesc(_cudnn_data_type, _stack_layer_num * _dir_count, batch, _hidden_size);
        _ct_desc.set3dDesc(_cudnn_data_type, _stack_layer_num * _dir_count, batch, _hidden_size);
         CudnnRunCheckRet(
                    cudnnRNNForwardTrainingEx(
                        _handle
                        , _rnn_desc
                        , _x_data_desc, gx->void_ptr()
                        , _ht_desc, init_ht
                        , _ct_desc, init_ct
                        , _par_desc, gw->void_ptr()
                        , _y_data_desc, gy->void_ptr()
                        , _ht_desc, out_ht
                        , _ct_desc, out_ct
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , nullptr, nullptr
                        , g_work->void_ptr(), _work_size
                        , g_reser->void_ptr(), _reserver_size
                        ));

Does anyone
tell me what’s the problem ? Thanks

Hi,

Could you please check if sequence length is satisfying below condition:
Each element in seqLengthArray must be greater than 0 but less than or equal to maxSeqLength. In the packed layout, the elements should be sorted in descending order
https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnSetRNNDataDescriptor

Thanks

sequence length all set same value(max sequence length - 1), it will get CUDNN_STATUS_EXECUTION_FAILED Error, only it all set to max sequence lenght ,it runs correct!

Hi,

Could you please share the sample repro script so we can help better?

Thanks