Convolution error between different algorithm

#include <cuda_runtime.h>
#include <cudnn.h>
#include <stdio.h>
#include <stdlib.h>

void teReadDataFromDisk(float* pbuf, int size, const char* filename)
{
FILE* pFile = NULL;
fopen_s(&pFile, filename, “rb+”);
fread(pbuf, sizeof(float), size, pFile);
fclose(pFile);
}

void main()
{
const int BB = 1, CC = 128, WW = 16, HH = 48;
const int XSIZE = BB * CC * WW * HH;
const int WSIZE = CC * CC * 9;
cudaError_t cudaerr = cudaSetDevice(0);

cudnnHandle_t handle = nullptr;
cudnnStatus_t cudnnerr = cudnnCreate(&handle);

float fAlpha = 1.0f, fBeta = 0.0f;

cudnnTensorDescriptor_t xDesc, yDesc;
cudnnFilterDescriptor_t wDesc;
cudnnConvolutionDescriptor_t convDesc;

cudnnerr = cudnnCreateTensorDescriptor(&xDesc);
cudnnerr = cudnnCreateTensorDescriptor(&yDesc);
cudnnerr = cudnnCreateFilterDescriptor(&wDesc);
cudnnerr = cudnnCreateConvolutionDescriptor(&convDesc);

cudnnerr = cudnnSetTensor4dDescriptor(xDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, BB, CC, HH, WW);
cudnnerr = cudnnSetTensor4dDescriptor(yDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, BB, CC, HH, WW);
cudnnerr = cudnnSetFilter4dDescriptor(wDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, CC, CC, 3, 3);

cudnnerr = cudnnSetConvolutionGroupCount(convDesc, 1);
//cudnnSetConvolutionMathType(convDesc, CUDNN_TENSOR_OP_MATH);
cudnnerr = cudnnSetConvolution2dDescriptor(convDesc, 1, 1, 1, 1, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);

//int b, c, w, h;
//cudnnerr = cudnnGetConvolution2dForwardOutputDim(convDesc, xDesc, wDesc, &b, &c, &h, &w);

cudnnConvolutionFwdAlgo_t algo = (cudnnConvolutionFwdAlgo_t)0;
size_t workspace_size = 0

cudnnerr = cudnnGetConvolutionForwardWorkspaceSize(handle, xDesc, wDesc, convDesc, yDesc,
	(cudnnConvolutionFwdAlgo_t)1, &workspace_size);//1769552

float *x_h = new float[XSIZE];
float *w_h = new float[WSIZE];
float *y_h1 = new float[XSIZE];
float *y_h2 = new float[XSIZE];
float *x_d, *w_d, *y_d, *workspace_d;

cudaerr = cudaMalloc(&x_d, XSIZE*4);
cudaerr = cudaMalloc(&y_d, XSIZE*4);
cudaerr = cudaMalloc(&w_d, WSIZE*4);
cudaerr = cudaMalloc(&workspace_d, workspace_size);

teReadDataFromDisk(x_h, XSIZE, "aaxx");
teReadDataFromDisk(w_h, WSIZE, "aaww");

cudaerr = cudaMemcpy(x_d, x_h, XSIZE * 4, cudaMemcpyHostToDevice);
cudaerr = cudaMemcpy(w_d, w_h, WSIZE * 4, cudaMemcpyHostToDevice);
 

cudnnerr = cudnnConvolutionForward(handle,
	&fAlpha, xDesc, x_d,
	wDesc, w_d,
	convDesc, algo, workspace_d, workspace_size,
	&fBeta,	yDesc, y_d);

cudaerr = cudaMemcpy(y_h1, y_d, XSIZE *4, cudaMemcpyDeviceToHost);

for (size_t ii = 0; ii < 20; ii++)
{
	printf("%f,", y_h1[ii]);
}
printf("\n\n");

algo = (cudnnConvolutionFwdAlgo_t)1;
cudnnerr = cudnnConvolutionForward(handle,
	&fAlpha, xDesc, x_d,
	wDesc, w_d,
	convDesc, algo, workspace_d, workspace_size,
	&fBeta, yDesc, y_d);

cudaerr = cudaMemcpy(y_h2, y_d, XSIZE *4, cudaMemcpyDeviceToHost);

for (size_t ii = 0; ii < 20; ii++)
{
	printf("%f,", y_h2[ii]);
}
printf("\n\n");

//destroy descriptors.
cudnnDestroy(handle);

system("Pause");

}

Hi,

Could you please give more details on the issue you’re facing like logs, cuDNN, CUDA versions you’re using?
We recommend you to please try on the latest cuDNN version 8.5.0.

Thank you.