Critical Defect: call to cudnnConvolutionForward() leading to cudaDeviceSynchronize() ret error 77

RossQ · December 21, 2018, 2:12am

Hi All,

I try to use cudnn in my framework, yet a call to cudnnConvolutionForward with specific combination leads to cuda failure

the code

void test_cudnn_err() {
	cudnnTensorDescriptor_t input_desc = NULL, output_desc = NULL;
	cudnnFilterDescriptor_t weight_desc = NULL;
	cudnnConvolutionDescriptor_t conv_desc = NULL;
	cudnnConvolutionFwdAlgo_t fwd_algo;
	cudnnCreateTensorDescriptor(&input_desc) ;
	cudnnCreateTensorDescriptor(&output_desc);
	cudnnCreateFilterDescriptor(&weight_desc);
	cudnnCreateConvolutionDescriptor(&conv_desc);

	int mini_batch = 4;
	int in_channels = 512;
	int in_height = 26;
	int in_width = 26;
	int size = 3;
	int padding = 1;
	int stride = 2;

	int out_channels = 1024;
	int out_height = 0, out_width = 0;

	void* workspace = NULL;
	size_t workspace_bytes = 0;
	float one = 1.0f, zero = 0.0f;

	cudnnSetTensor4dDescriptor(input_desc,
		CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, mini_batch, in_channels, in_height, in_width);

	cudnnSetFilter4dDescriptor(weight_desc,
		CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, out_channels, in_channels, size, size);

	cudnnSetConvolution2dDescriptor(conv_desc,
		padding, padding, stride, stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);

	int temp = 0;
	cudnnGetConvolution2dForwardOutputDim(conv_desc,
			input_desc, weight_desc, &temp, &out_channels, &out_height, &out_width);
	cudnnSetTensor4dDescriptor(output_desc,
		CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, mini_batch, out_channels, out_height, out_width);
	cudnnGetConvolutionForwardAlgorithm(GetCUDNNHandle(),
		input_desc, weight_desc, conv_desc, output_desc, CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
		/*memoryLimitInBytes=*/0, &fwd_algo);

	size_t in_bytes = in_height * in_width * mini_batch * in_channels * sizeof(float);
	size_t w_bytes = size * size * out_channels * sizeof(float);
	size_t out_bytes = out_height * out_width * mini_batch * out_channels * sizeof(float);

	float* input = NULL;
	float* weights = NULL;
	float* output = NULL;
	cudaMalloc(&input, in_bytes);
	cudaMalloc(&weights, w_bytes);
	cudaMalloc(&output, out_bytes);

//	BatchMatrix input(in_height, in_width, mini_batch * in_channels);
//	BatchMatrix weights(size, size, out_channels);
//	BatchMatrix output(out_height, out_width, mini_batch * out_channels);

	cudnnGetConvolutionForwardWorkspaceSize(GetCUDNNHandle(),
		input_desc, weight_desc, conv_desc, output_desc, fwd_algo, &workspace_bytes);
	
	cudaMalloc(&workspace, workspace_bytes);

	cudnnStatus_t status = cudnnConvolutionForward(GetCUDNNHandle(), &one,
		input_desc, input, weight_desc, weights,
		conv_desc, fwd_algo, workspace, workspace_bytes,
		&zero, output_desc, output);

	//BatchMatrix bm(out_height, out_width, mini_batch * out_channels);
	//bool b = bm.Add(0.2); // failed
	//b = bm.Add(-0.2);

	cudaError_t e = cudaFree(input); // e == cudaErrorIllegalAddress(77)
	e = cudaFree(weights);
	e = cudaFree(output);
 
	if (input_desc) cudnnDestroyTensorDescriptor(input_desc);
	if (output_desc) cudnnDestroyTensorDescriptor(output_desc);
	if (weight_desc) cudnnDestroyFilterDescriptor(weight_desc);
	if (conv_desc) cudnnDestroyConvolutionDescriptor(conv_desc);
}

RossQ · December 21, 2018, 2:40am

latest version v7.4.2

RossQ · December 21, 2018, 3:18am

cuda 9.2 + cuDNN 7.4.2

RossQ · December 21, 2018, 3:39am

reduce out_channels to 256
or
change

size_t in_bytes = in_height * in_width * mini_batch * in_channels * sizeof(float) ;

to

size_t in_bytes = in_height * in_width * mini_batch * in_channels * sizeof(float) * 4;

problem solved.

however, why *4?

RossQ · December 21, 2018, 1:21pm

Sorry, Now I realized what mistake I’ve made.

size_t w_bytes = size * size * out_channels * in_channel * sizeof(float);

Topic		Replies	Views
Error with cudnnGetConvolutionForwardWorkspaceSize cuDNN	0	905	September 5, 2018
Convolution error between different algorithm cuDNN cuda	1	692	September 2, 2022
cudnnGetConvolutionForwardWorkspaceSize get CUDNN_STATUS_NOT_INITIALIZED error when linking to static cudnn9 cuDNN cuda , cudnn	1	103	August 2, 2024
Problem Setting Cudnn 7 cuDNN	1	1465	April 26, 2018
cuDNN batched input - not running correctly cuDNN cuda , cudnn	2	763	December 1, 2020
Is zero-padding not supporting in conv with cuDNN v2??? I got error CUDNN_STATUS_BAD_PARAM GPU-Accelerated Libraries	3	3501	September 28, 2015
cudnn: input tensor smaller than filter size GPU-Accelerated Libraries	0	681	June 12, 2016
cudnnGetConvolutionForwardWorkspaceSize - possible overflow? GPU-Accelerated Libraries	1	1966	February 5, 2015
cuDNN: cudnnGetConvolutionForwardWorkspaceSize fails with bad parameter cuDNN	1	1164	May 19, 2022
Cudnn convolution slow since 8.4.0 cuDNN	5	1292	May 31, 2023

Critical Defect: call to cudnnConvolutionForward() leading to cudaDeviceSynchronize() ret error 77

Related topics