Inputs assertion failed in SSDConcatPlugin (libnvinfer_plugin.so.4.0.4)

Hi, I am trying to get SSD to work with TensorRT and I encountered some difficulty. I tried searching the forums and google but I couldn’t seem to find the right solution.

This is the code I used to create the engine:

nvinfer1::ICudaEngine * caffeToGIE(const std::string& deployFile,
		const std::string& modelFile,	
		const std::vector<std::string>& outputs,	
		unsigned int maxBatchSize,
		nvcaffeparser1::IPluginFactory* pluginFactory)
		{
	Logger gLogger;
	nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);

	nvinfer1::INetworkDefinition* network = builder->createNetwork();
	nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
	if (pluginFactory) {
		parser->setPluginFactory(pluginFactory);
	}
	std::cout << "Begin parsing model..." << std::endl;
	const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(
			deployFile.c_str(), modelFile.c_str(), *network,
			nvinfer1::DataType::kFLOAT);
	std::cout << "End parsing model..." << std::endl;
	for (auto& s : outputs) {
		network->markOutput(*blobNameToTensor->find(s.c_str()));
	}

	builder->setMaxBatchSize(maxBatchSize);
	builder->setMaxWorkspaceSize(10 << 21);

	std::cout << "Begin building engine..." << std::endl;
	nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
	assert(engine);
	std::cout << "End building engine..." << std::endl;

	network->destroy();
	parser->destroy();
	builder->destroy();
	nvcaffeparser1::shutdownProtobufLibrary();

	return engine;
}

The code executes up to the following line before throwing the following error:

nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);

NvPluginSSD.cu:730: virtual nvinfer1::Dims nvinfer1::plugin::Concat::getOutputDimensions(int, const nvinfer1::Dims*, int): Assertion `inputs[i].d[1]==inputs[0].d[1]’ failed.

This is the code segment used to intialize the concat plugin inside the plugin factory implementation:

typedef std::unique_ptr<nvinfer1::plugin::INvPlugin, decltype(nvPluginDeleter)> NvPluginType;
if (!strcmp(layerName, "mbox_loc")) {
	testPlugin = NvPluginType(nvinfer1::plugin::createConcatPlugin(1, false), nvPluginDeleter);
	return testPlugin.get();
}

I wrote a dummy plugin to see what the tensor dimensions were and this is the output for “mbox_loc”:

Begin building engine…
nbdims=3
Channel,Spatial,Spatial,
38,38,16,
nbdims=3
Channel,Spatial,Spatial,
19,19,24,
nbdims=3
Channel,Spatial,Spatial,
10,10,24,
nbdims=3
Channel,Spatial,Spatial,
5,5,24,
nbdims=3
Channel,Spatial,Spatial,
3,3,16,
nbdims=3
Channel,Spatial,Spatial,
1,1,16,

Comparing this to the caffe output for the bottom layers,
I noticed that the number of elements seem to be missing a multiply of 8:
https://drive.google.com/file/d/1jzhCYBKhLAirJVz8yzM6TXuY9INuTSoH/view?usp=sharing

In debug mode, I noticed there is an additional 4th value of 10 in all the input dims:
https://drive.google.com/file/d/1dsN1GwNjH5buf0jtrDFgW0VhJakMm0pf/view?usp=sharing

This is the dummy plugin code if it helps:

#ifndef TRT_DUMMY_PLUGIN_H_
#define TRT_DUMMY_PLUGIN_H_

#include <NvInfer.h>
#include <iostream>

using namespace nvinfer1;
class Dummy: public IPlugin {
public:
	Dummy(){}
	Dummy(const void* buffer, size_t size){}
	int getNbOutputs() const override { return 1; }
	Dims getOutputDimensions(int index, const Dims* inputs,
			int nbInputBlobs)
			override {
		for (int i = 0; i < nbInputBlobs; ++i) {
			Dims cur = inputs[i];
			std::cout << "nbdims="<<cur.nbDims<< std::endl;
			for (int k = 0; k < cur.nbDims; ++k) {
				switch (cur.type[k]) {
				case nvinfer1::DimensionType::kCHANNEL: {
					std::cout << "Channel" << ",";
					break;
				}
				case nvinfer1::DimensionType::kINDEX: {
					std::cout << "Index" << ",";
					break;
				}
				case nvinfer1::DimensionType::kSEQUENCE: {
					std::cout << "Seq" << ",";
					break;
				}
				case nvinfer1::DimensionType::kSPATIAL: {
					std::cout << "Spatial" << ",";
					break;
				}
				}
			}
			std::cout << std::endl;
                        for (int k = 0; k < cur.nbDims; ++k) {
				std::cout << cur.d[k] << ",";
			}
			std::cout << std::endl;
		}
		return Dims();
	}
	int initialize() override { return 0; }
	void terminate() override {}
	size_t getWorkspaceSize(int) const override { return 0; }
	int enqueue(int batchSize, const void* const *inputs, void** outputs, void*,
			cudaStream_t stream) override { return 0; }
	size_t getSerializationSize() override { return 0; }
	void serialize(void* buffer) override { }
	void configure(const Dims*inputs, int nbInputs,
			const Dims* outputs, int nbOutputs, int maxBatchSize)
					override {}
protected:
};
#endif /* TRT_DUMMY_PLUGIN_H_ */

I’m quite new to caffe and Tensor RT so I’m confused why the dimensions seem to be shown in a different order in caffe and Tensor RT. Also, why is it 10 in Tensor RT but 8 in caffe?
Also, I’m wondering how I could solve the error mentioned in the title.

SSD: https://github.com/weiliu89/caffe/tree/ssd
Original prototxt: https://drive.google.com/file/d/14iCZftdTlvV39UM7PpZjS4ipwA3K3wDV/view?usp=sharing
TensorRT prototxt: https://drive.google.com/file/d/15mdbIXgRBQ6-zMQtwj7DcLOJv-NBOfdu/view?usp=sharing
Layer dimensions output by caffe: https://drive.google.com/file/d/1u8SXJ5kcRUrGhjUdEnIn1LWCEzdKbFRG/view?usp=sharing