I have implemented batch normalization layer to be used after 3D convolution. It only runs the first data point and it produces a constant result for rest of the data point.
Dimensions:
Input shape - (16, 16, 16)
Batch Size - 32
NumInpChannels - 32
lstfWeights[0] - 32
lstfWeights[1] - 32
checkCudnnErr(cudnnCreateTensorDescriptor(&m_cudnnInpTensorDesc));
checkCudnnErr(cudnnCreateTensorDescriptor(&m_cudnnOutTensorDesc));
checkCudnnErr(cudnnCreateTensorDescriptor(&m_cudnnBiasMeanVarDesc));
int nlInpDims[5];
nlInpDims[0] = nBatchSize;
nlInpDims[1] = nNumInpChannels;
nlInpDims[2] = nlInpShape[0];
nlInpDims[3] = nlInpShape[1];
nlInpDims[4] = nlInpShape[2];
m_nOutputShape = new int[5];
m_nOutputShape[0] = nBatchSize;
m_nOutputShape[1] = nNumInpChannels;
m_nOutputShape[2] = nlInpShape[0];
m_nOutputShape[3] = nlInpShape[1];
m_nOutputShape[4] = nlInpShape[2];
m_cudnnBatchNormMode = CUDNN_BATCHNORM_SPATIAL;
int nlMVBDims[5] = {1, nNumInpChannels, 1, 1, 1};
checkCudnnErr(cudnnSetTensorNdDescriptor(m_cudnnInpTensorDesc,
cudnnDtype,
5,
nlInpDims,
nlInpStrides));
checkCudnnErr(cudnnSetTensorNdDescriptor(m_cudnnOutTensorDesc,
cudnnDtype,
5,
m_nOutputShape,
nlOutputStrides));
checkCudnnErr(cudnnSetTensorNdDescriptor(m_cudnnBiasMeanVarDesc,
CUDNN_DATA_FLOAT,
5,
nlMVBDims,
nlNVBStrides));
const float alpha = 1.0f, beta = 0.0f;
checkCudnnErr(cudnnBatchNormalizationForwardInference(m_cudnnHandle,
m_cudnnBatchNormMode,
&alpha,
&beta,
m_cudnnInpTensorDesc,
lstfInputs[0],
m_cudnnOutTensorDesc,
lstfOutputs[0],
m_cudnnBiasMeanVarDesc,
lstfWeights[3],
lstfWeights[2],
lstfWeights[0],
lstfWeights[1],
(double)1e-6));
Is the implementation correct?