I use evolutionary method to train LSTM network, calculate and assign weights and bias values to the network according to the example method(cuDNN-sample/RNN_example.cu at master · Hardware-Alchemy/cuDNN-sample · GitHub). But netparamstotal = weight_ size / size of (float) is inconsistent with the sum of weightscounts and biascounts. Why? How to calculate and assign network parameters correctly?
In addition, change the bias mode (CUDNN_ RNN_ BIAS/CUDNN_ RNN_ DOUBLE_BIAS) Why is netparamstotal unchanged?
Code snippet:
size_t weight_size;
checkCudnnErrors(cudnnGetRNNWorkspaceSize(cudnnHandle, rnn_desc, seq_length, x_desc, &workspace_size));
checkCudnnErrors(cudnnGetRNNParamsSize(cudnnHandle, rnn_desc, x_desc[0], &weight_size, CUDNN_DATA_FLOAT));
checkCudaErrors(cudaMalloc((void **) &weights, weight_size));
checkCudaErrors(cudaMalloc((void **) &workspace, workspace_size));
// initialize filter descriptors
// cudnnFilterDescriptor_t w_desc;
netParamsTotal=weight_size / sizeof(float);
int dimW[] = {static_cast<int>(weight_size / sizeof(float)), 1, 1};
checkCudnnErrors(cudnnCreateFilterDescriptor(&w_desc));
checkCudnnErrors(cudnnSetFilterNdDescriptor(w_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));
// weights
size_t linearIndex = 0;
for (int layer = 0; layer < num_layers; layer++) {
cudnnDataType_t data_type;
cudnnTensorFormat_t format;
int nb_dim, filter_dim[3];
cudnnFilterDescriptor_t linear_filter_desc, linear_bias_desc;
float *linear_layer_filter, *linear_bias=nullptr;
for (int linear_layer = 0; linear_layer < num_linear_layers; ++linear_layer) {
// filter
checkCudnnErrors(cudnnCreateFilterDescriptor(&linear_filter_desc));
checkCudnnErrors(cudnnGetRNNLinLayerMatrixParams(cudnnHandle, rnn_desc, layer, x_desc[0],
w_desc, weights, linear_layer, linear_filter_desc,
(void **) &linear_layer_filter));
checkCudnnErrors(
cudnnGetFilterNdDescriptor(linear_filter_desc, 3, &data_type, &format, &nb_dim, filter_dim));
weightsCounts[linearIndex] = filter_dim[0] * filter_dim[1] * filter_dim[2];
linearLayerFilters[linearIndex] = linear_layer_filter;
// bias
if(biasMode!=CUDNN_RNN_NO_BIAS){
checkCudnnErrors(cudnnCreateFilterDescriptor(&linear_bias_desc));
checkCudnnErrors(cudnnGetRNNLinLayerBiasParams(cudnnHandle, rnn_desc, layer,
x_desc[0], w_desc, weights, linear_layer, linear_bias_desc,
(void **) &linear_bias));
checkCudnnErrors(cudnnGetFilterNdDescriptor(linear_bias_desc, 3, &data_type, &format, &nb_dim, filter_dim));
biasCounts[linearIndex] = filter_dim[0] * filter_dim[1] * filter_dim[2];
linearBiases[linearIndex] = linear_bias;
}else{
biasCounts[linearIndex] = 0;
linearBiases[linearIndex] = nullptr;
}
++linearIndex;
checkCudnnErrors(cudnnDestroyFilterDescriptor(linear_filter_desc));
if(biasMode!=CUDNN_RNN_NO_BIAS){
checkCudnnErrors(cudnnDestroyFilterDescriptor(linear_bias_desc));
}
}
}