Hello,
Core function is below:
[code]ICudaEngine* createMNISTEngine(int maxBatchSize, IBuilder* builder, DataType dt, size_t input_h, size_t input_w, size_t label_cnt, std::string model_weights)
{
INetworkDefinition* network = builder->createNetwork();
std::map<std::string, Weights> weightMap = loadWeights(model_weights);
std::cout << "load weights finished" << std::endl;
// Create input tensor with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims4{1, 1, input_h, input_w});
assert(data);
// Create scale layer with default power/shift and specified scale parameter.
const float scaleParam = 1; // 0.00390625;
const Weights power{DataType::kFLOAT, nullptr, 0};
const Weights shift{DataType::kFLOAT, nullptr, 0};
const Weights scale{DataType::kFLOAT, &scaleParam, 1};
IScaleLayer* scale_1 = network->addScale(*data, ScaleMode::kUNIFORM, shift, scale, power);
assert(scale_1);
// CNN backbone
// conv0_1
scale_1->getOutput(0)->setName("conv0_1_input");
IConvolutionLayer* conv0_1 = network->addConvolution(*scale_1->getOutput(0), 32, DimsHW{3, 3}, weightMap["FeatureExtraction.ConvNet.conv0_1.weight"], weightMap["FeatureExtraction.ConvNet.conv0_1.bias"]);
assert(conv0_1);
conv0_1->setStride(DimsHW{1, 1});
conv0_1->setPadding(DimsHW{1, 1});
// bn0_1
IScaleLayer* bn0_1 = network->addScale(*conv0_1->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn0_1.shift"], weightMap["FeatureExtraction.ConvNet.bn0_1.scale"], power);
assert(bn0_1);
IActivationLayer* relu0_1 = network->addActivation(*bn0_1->getOutput(0), ActivationType::kRELU);
assert(relu0_1);
assert(conv0_2);
conv0_2->setStride(DimsHW{1, 1});
conv0_2->setPadding(DimsHW{1, 1});
IScaleLayer* bn0_2 = network->addScale(*conv0_2->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn0_2.shift"], weightMap["FeatureExtraction.ConvNet.bn0_2.scale"], power);
assert(bn0_2);
// pool1
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
assert(pool1);
pool1->setStride(DimsHW{2, 2});
pool1->setPadding(DimsHW{0, 0});
// layer1
ITensor* layer1 = getLayer(network, weightMap, *pool1->getOutput(0), 128, 1, 1, true);
// conv1 + bn1 + relu
IConvolutionLayer* conv1 = network->addConvolution(*layer1, 128, DimsHW{3, 3}, weightMap["FeatureExtraction.ConvNet.conv1.weight"], weightMap["FeatureExtraction.ConvNet.conv1.bias"]);
assert(conv1);
conv1->setStride(DimsHW{1, 1});
conv1->setPadding(DimsHW{1, 1});
IScaleLayer* bn1 = network->addScale(*conv1->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn1.shift"], weightMap["FeatureExtraction.ConvNet.bn1.scale"], power);
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
assert(relu1);
// maxpool2
IPoolingLayer* pool2 = network->addPooling(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{2, 2});
pool2->setStride(DimsHW{2, 2});
ITensor* layer2 = getLayer(network, weightMap, *pool2->getOutput(0), 256, 2, 2, true);
// conv2 + bn2 + relu2
assert(conv2);
conv2->setStride(DimsHW{1, 1});
conv2->setPadding(DimsHW{1, 1});
IScaleLayer* bn2 = network->addScale(*conv2->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn2.shift"], weightMap["FeatureExtraction.ConvNet.bn2.scale"], power);
assert(bn2);
IActivationLayer* relu2 = network->addActivation(*bn2->getOutput(0), ActivationType::kRELU);
assert(relu2);
// maxpool3
IPoolingLayer* pool3 = network->addPooling(*relu2->getOutput(0), PoolingType::kMAX, DimsHW{2, 2});
assert(pool3);
pool3->setStride(DimsHW{2, 1});
pool3->setPadding(DimsHW{0, 1});
// layer3
ITensor* layer3 = getLayer(network, weightMap, *pool3->getOutput(0), 512, 5, 3, true);
// conv3 + bn3 + relu
IConvolutionLayer* conv3 = network->addConvolution(*layer3, 512, DimsHW{3, 3}, weightMap["FeatureExtraction.ConvNet.conv3.weight"], weightMap["FeatureExtraction.ConvNet.conv3.bias"]);
assert(conv3);
conv3->setStride(DimsHW{1, 1});
assert(bn3);
IActivationLayer* relu3 = network->addActivation(*bn3->getOutput(0), ActivationType::kRELU);
// layer4
// conv4_1 + bn4_1 + conv4_2 + bn4_2
IConvolutionLayer* conv4_1 = network->addConvolution(*layer4, 512, DimsHW{2, 2}, weightMap["FeatureExtraction.ConvNet.conv4_1.weight"], weightMap["FeatureExtraction.ConvNet.conv4_1.bias"]);
assert(conv4_1);
conv4_1->setStride(DimsHW{2, 1});
conv4_1->setPadding(DimsHW{0, 1});
IScaleLayer* bn4_1 = network->addScale(*conv4_1->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn4_1.shift"], weightMap["FeatureExtraction.ConvNet.bn4_1.scale"], power);
assert(bn4_1);
IActivationLayer* relu4_1 = network->addActivation(*bn4_1->getOutput(0), ActivationType::kRELU);
assert(relu4_1);
IConvolutionLayer* conv4_2 = network->addConvolution(*relu4_1->getOutput(0), 512, DimsHW{2, 2}, weightMap["FeatureExtraction.ConvNet.conv4_2.weight"], weightMap["FeatureExtraction.ConvNet.conv4_2.bias"]);
assert(conv4_2);
conv4_2->setStride(DimsHW{1, 1});
conv4_2->setPadding(DimsHW{0, 0});
IScaleLayer* bn4_2 = network->addScale(*conv4_2->getOutput(0), ScaleMode::kCHANNEL, weightMap["FeatureExtraction.ConvNet.bn4_2.shift"], weightMap["FeatureExtraction.ConvNet.bn4_2.scale"], power);
assert(bn4_2);
IActivationLayer* relu4_2 = network->addActivation(*bn4_2->getOutput(0), ActivationType::kRELU);
auto permuted_data = network->addShuffle(*relu4_2->getOutput(0));
assert(permuted_data);
permuted_data->setFirstTranspose(nvinfer1::Permutation{0, 3, 1, 2});
permuted_data->setReshapeDimensions(Dims3{0, 0, -1});
permuted_data->getOutput(0)->setName("visual_features");
// Sequence modeling stage
size_t hidden_size = 512;
std::vector<Weights> weight_ih, weight_hh, bias_ih, bias_hh, weight_ih_reverse, weight_hh_reverse, bias_ih_reverse, bias_hh_reverse;
// 1st BiLSTM
IRNNv2Layer* bilstm1 = network->addRNNv2(*permuted_data->getOutput(0), 1, hidden_size, times, RNNOperation::kLSTM);
assert(bilstm1);
bilstm1->getOutput(0)->setName("BiLSTM1");
weight_ih.push_back(weightMap["SequenceModeling.0.rnn.weight_ih_l0"]);
weight_hh.push_back(weightMap["SequenceModeling.0.rnn.weight_hh_l0"]);
bias_ih.push_back(weightMap["SequenceModeling.0.rnn.bias_ih_l0"]);
bias_hh.push_back(weightMap["SequenceModeling.0.rnn.bias_hh_l0"]);
weight_ih_reverse.push_back(weightMap["SequenceModeling.0.rnn.weight_ih_l0_reverse"]);
weight_hh_reverse.push_back(weightMap["SequenceModeling.0.rnn.weight_hh_l0_reverse"]);
bias_ih_reverse.push_back(weightMap["SequenceModeling.0.rnn.bias_ih_l0_reverse"]);
bias_hh_reverse.push_back(weightMap["SequenceModeling.0.rnn.bias_hh_l0_reverse"]);
addBiLSTM(bilstm1, hidden_size, hidden_size, 1, weight_ih, weight_hh, bias_ih, bias_hh, weight_ih_reverse, weight_hh_reverse, bias_ih_reverse, bias_hh_reverse);
auto bilstm1_reshape = network->addShuffle(*bilstm1->getOutput(0));
assert(bilstm1_reshape);
bilstm1_reshape->setReshapeDimensions(Dims4{-1, 1024, 1, 1});
auto bilstm1_linear = network->addFullyConnected(*bilstm1_reshape->getOutput(0), 512, weightMap["SequenceModeling.0.linear.weight"], weightMap["SequenceModeling.0.linear.bias"]);
auto bilstm1_linear_reshape = network->addShuffle(*bilstm1_linear->getOutput(0));
assert(bilstm1_linear_reshape);
bilstm1_linear_reshape->setReshapeDimensions(Dims3{-1, (int)times, 512});
// 2nd BiLSTM
IRNNv2Layer* bilstm2 = network->addRNNv2(*bilstm1_linear_reshape->getOutput(0), 1, hidden_size, times, RNNOperation::kLSTM);
assert(bilstm2);
weight_ih.clear();
weight_ih.push_back(weightMap["SequenceModeling.1.rnn.weight_ih_l0"]);
weight_hh.clear();
weight_hh.push_back(weightMap["SequenceModeling.1.rnn.weight_hh_l0"]);
bias_ih.clear();
bias_ih.push_back(weightMap["SequenceModeling.1.rnn.bias_ih_l0"]);
bias_hh.clear();
bias_hh.push_back(weightMap["SequenceModeling.1.rnn.bias_hh_l0"]);
weight_ih_reverse.clear();
weight_ih_reverse.push_back(weightMap["SequenceModeling.1.rnn.weight_ih_l0_reverse"]);
weight_hh_reverse.clear();
weight_hh_reverse.push_back(weightMap["SequenceModeling.1.rnn.weight_hh_l0_reverse"]);
bias_ih_reverse.push_back(weightMap["SequenceModeling.1.rnn.bias_ih_l0_reverse"]);
bias_hh_reverse.push_back(weightMap["SequenceModeling.1.rnn.bias_hh_l0_reverse"]);
addBiLSTM(bilstm2, hidden_size, hidden_size, 1, weight_ih, weight_hh, bias_ih, bias_hh, weight_ih_reverse, weight_hh_reverse, bias_ih_reverse, bias_hh_reverse);
auto bilstm2_reshape = network->addShuffle(*bilstm2->getOutput(0));
assert(bilstm2_reshape);
bilstm2_reshape->setReshapeDimensions(Dims4{-1, 1024, 1, 1});
auto bilstm2_linear = network->addFullyConnected(*bilstm2_reshape->getOutput(0), 512, weightMap["SequenceModeling.1.linear.weight"], weightMap["SequenceModeling.1.linear.bias"]);
assert(bilstm2_linear);
auto bilstm2_linear_reshape = network->addShuffle(*bilstm2_linear->getOutput(0));
assert(bilstm2_linear_reshape);
bilstm2_linear_reshape->setReshapeDimensions(Dims4{-1, 512, 1, 1});
// Prediction Stage
auto prd_linear = network->addFullyConnected(*bilstm2_linear_reshape->getOutput(0), label_cnt, weightMap["Prediction.weight"], weightMap["Prediction.bias"]);
ISoftMaxLayer* prob = network->addSoftMax(*prd_linear->getOutput(0));
assert(prob);
auto permuted_output = network->addShuffle(*prob->getOutput(0));
permuted_output->setReshapeDimensions(Dims3{-1, (int)times, label_cnt});
auto prd = network->addTopK(*permuted_output->getOutput(0), nvinfer1::TopKOperation::kMAX, 1, 1<<2);
auto output_layer = prd;
// output prob
output_layer->getOutput(0)->setName(OUTPUT_BLOB_PROB);
network->markOutput(*output_layer->getOutput(0));
// output index
output_layer->getOutput(1)->setName(OUTPUT_BLOB_INDEX);
network->markOutput(*output_layer->getOutput(1));
output_layer->getOutput(1)->setType(DataType::kINT32);
// Build engine
builder->setMaxBatchSize(maxBatchSize);
std::cout << "===== maxBatchSize: " << maxBatchSize << std::endl;
// max memory can be used
builder->setMaxWorkspaceSize(8000000000);
ICudaEngine* engine = builder->buildCudaEngine(*network);
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem: weightMap)
{
free((void*) (mem.second.values));
}
return engine;
}
[/code]