Im using TensorRT to accelerate a mnist caffe model, but the result is very bad 6%(even worse than guessing). I get to know TensorRT only for several days, I do need some help. The code is as following. I rewrite the official giexec sample and barely change anything. I think there maybe some problem with line 238~267, but I cant find the bugs.
#include <assert.h>
#include
#include
#include
#include
#include <sys/stat.h>
#include
#include <time.h>
#include <cuda_runtime_api.h>
#include
#include
#include <string.h>
#include
#include
#include <sys/time.h>
#include <unistd.h>
#include <opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include “NvInfer.h”
#include “NvCaffeParser.h”
using namespace nvinfer1;
using namespace nvcaffeparser1;
#define CHECK(status)
{
if (status != 0)
{
std::cout << "Cuda failure: " << status;
abort();
}
}
struct Params
{
std::string deployFile, modelFile, engine, calibrationCache;
std::vectorstd::string outputs;
int device{ 0 }, batchSize{ 1 }, workspaceSize{ 16 }, iterations{ 10 }, avgRuns{ 10 };
bool half2{ false }, int8{ false }, verbose{ false }, hostTime{ false };
} gParams;
static inline int volume(DimsCHW dims)
{
return dims.c()*dims.h()*dims.w();
}
std::vectorstd::string gInputs;
std::map<std::string, DimsCHW> gInputDimensions;
// Logger for GIE info/warning/errors
class Logger : public ILogger
{
void log(Severity severity, const char* msg) override
{
// suppress info-level messages
if (severity != Severity::kINFO || gParams.verbose)
std::cout << msg << std::endl;
}
} gLogger;
class RndInt8Calibrator : public IInt8EntropyCalibrator
{
public:
RndInt8Calibrator(int totalSamples = 1)
: mTotalSamples(totalSamples)
, mCurrentSample(0)
{
std::default_random_engine generator;
std::uniform_real_distribution distribution(-1.0F, 1.0F);
for(auto& elem: gInputDimensions)
{
int elemCount = volume(elem.second);
std::vector<float> rnd_data(elemCount);
for(auto& val: rnd_data)
val = distribution(generator);
void * data;
CHECK(cudaMalloc(&data, elemCount * sizeof(float)));
CHECK(cudaMemcpy(data, &rnd_data[0], elemCount * sizeof(float), cudaMemcpyHostToDevice));
mInputDeviceBuffers.insert(std::make_pair(elem.first, data));
}
}
~RndInt8Calibrator()
{
for(auto& elem: mInputDeviceBuffers)
CHECK(cudaFree(elem.second));
}
int getBatchSize() const override
{
return 1;
}
bool getBatch(void* bindings[], const char* names[], int nbBindings) override
{
if (mCurrentSample >= mTotalSamples)
return false;
for(int i = 0; i < nbBindings; ++i)
bindings[i] = mInputDeviceBuffers[names[i]];
++mCurrentSample;
return true;
}
const void* readCalibrationCache(size_t&) override
{
return nullptr;
}
virtual void writeCalibrationCache(const void*, size_t) override
{
}
private:
int mTotalSamples;
int mCurrentSample;
std::map<std::string, void*> mInputDeviceBuffers;
};
ICudaEngine* caffeToGIEModel()
{
// create the builder
IBuilder* builder = createInferBuilder(gLogger);
// parse the caffe model to populate the network, then set the outputs
INetworkDefinition* network = builder->createNetwork();
ICaffeParser* parser = createCaffeParser();
const IBlobNameToTensor* blobNameToTensor = parser->parse(gParams.deployFile.c_str(),
gParams.modelFile.c_str(),
*network,
gParams.half2 ? DataType::kHALF:DataType::kFLOAT);
if (!blobNameToTensor)
return nullptr;
for (int i = 0, n = network->getNbInputs(); i < n; i++)
{
DimsCHW dims = static_cast<DimsCHW&&>(network->getInput(i)->getDimensions());
gInputs.push_back(network->getInput(i)->getName());
gInputDimensions.insert(std::make_pair(network->getInput(i)->getName(), dims));
std::cout << "Input \"" << network->getInput(i)->getName() << "\": " << dims.c() << "x" << dims.h() << "x" << dims.w() << std::endl;
}
// specify which tensors are outputs
for (auto& s : gParams.outputs)
{
if (blobNameToTensor->find(s.c_str()) == nullptr)
{
std::cout << "could not find output blob " << s << std::endl;
return nullptr;
}
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
for (int i = 0, n = network->getNbOutputs(); i < n; i++)
{
DimsCHW dims = static_cast<DimsCHW&&>(network->getOutput(i)->getDimensions());
std::cout << "Output \"" << network->getOutput(i)->getName() << "\": " << dims.c() << "x" << dims.h() << "x" << dims.w() << std::endl;
}
// Build the engine
builder->setMaxBatchSize(gParams.batchSize);
builder->setMaxWorkspaceSize(gParams.workspaceSize<<20);
builder->setHalf2Mode(gParams.half2);
RndInt8Calibrator calibrator;
if (gParams.int8)
{
builder->setInt8Mode(true);
builder->setInt8Calibrator(&calibrator);
}
ICudaEngine* engine = builder->buildCudaEngine(*network);
if (engine == nullptr)
std::cout << "could not build engine" << std::endl;
parser->destroy();
network->destroy();
builder->destroy();
shutdownProtobufLibrary();
return engine;
}
void createMemory(const ICudaEngine& engine, std::vector<void*>& buffers, const std::string& name)
{
size_t bindingIndex = engine.getBindingIndex(name.c_str());
printf(“name=%s, bindingIndex=%d, buffers.size()=%d\n”, name.c_str(), (int)bindingIndex, (int)buffers.size());
assert(bindingIndex < buffers.size());
DimsCHW dimensions = static_cast<DimsCHW&&>(engine.getBindingDimensions((int)bindingIndex));
size_t eltCount = dimensions.c()*dimensions.h()*dimensions.w()*gParams.batchSize, memSize = eltCount * sizeof(float);
void* deviceMem;
CHECK(cudaMalloc(&deviceMem, memSize));
if (deviceMem == nullptr)
{
std::cerr << "Out of memory" << std::endl;
exit(1);
}
buffers[bindingIndex] = deviceMem;
}
void doInference(ICudaEngine& engine,std::string working_file)
{
IExecutionContext *context = engine.createExecutionContext();
// input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
// of these, but in this case we know that there is exactly one input and one output.
std::vector<void*> buffers(gInputs.size() + gParams.outputs.size());
for (size_t i = 0; i < gInputs.size(); i++)
createMemory(engine, buffers, gInputs[i]);
for (size_t i = 0; i < gParams.outputs.size(); i++)
createMemory(engine, buffers, gParams.outputs[i]);
FILE *fp=fopen(working_file.c_str(),"r");
fseek(fp,0L,SEEK_END);
long fend=ftell(fp);
fseek(fp,0L,SEEK_SET);
long fstart=ftell(fp);
char im_name[1024];
cv::Mat im,im_float;
size_t memSize=1*28*28*sizeof(float);
struct timeval t1,t2;
gettimeofday(&t1,NULL);
float prob[10];
FILE *fout=fopen("rc.txt","w");
cudaStream_t stream;
while(fstart!=fend)
{
fgets(im_name,1024,fp);
CHECK(cudaStreamCreate(&stream));
im_name[strlen(im_name)-1]='\0';
im=cv::imread(im_name);
im.convertTo(im_float,CV_32FC1);
for (int i=0;i<28*28;i++)
im_float.data[i]/=256.0;
CHECK(cudaMemcpy(buffers[0], im_float.data, memSize, cudaMemcpyHostToDevice));
context->enqueue(gParams.batchSize, &buffers[0], stream, nullptr);
CHECK(cudaMemcpyAsync(prob,buffers[1],10*sizeof(float),cudaMemcpyDeviceToHost));
cudaStreamSynchronize(stream);
int id=0;
float val=-1.0;
for (int i=0;i<10;i++)
{
if (prob[i]>val)
{
val=prob[i];
id=i;
}
}
fprintf(fout,"%d\n",id);
fstart=ftell(fp);
usleep(10);
cudaStreamDestroy(stream);
}
gettimeofday(&t2,NULL);
printf("%ld %ld\n",t2.tv_sec-t1.tv_sec,t2.tv_usec-t2.tv_usec);
fclose(fp);
fclose(fout);
}
static void printUsage()
{
printf(“\n”);
printf(“Mandatory params:\n”);
printf(" --deploy= Caffe deploy file\n");
printf(" --output= Output blob name (can be specified multiple times)\n");
printf("\nOptional params:\n");
printf(" --model=<file> Caffe model file (default = no model, random weights used)\n");
printf(" --batch=N Set batch size (default = %d)\n", gParams.batchSize);
printf(" --device=N Set cuda device to N (default = %d)\n", gParams.device);
printf(" --iterations=N Run N iterations (default = %d)\n", gParams.iterations);
printf(" --avgRuns=N Set avgRuns to N - perf is measured as an average of avgRuns (default=%d)\n", gParams.avgRuns);
printf(" --workspace=N Set workspace size in megabytes (default = %d)\n", gParams.workspaceSize);
printf(" --half2 Run in paired fp16 mode (default = false)\n");
printf(" --int8 Run in int8 mode (default = false)\n");
printf(" --verbose Use verbose logging (default = false)\n");
printf(" --hostTime Measure host time rather than GPU time (default = false)\n");
printf(" --engine=<file> Generate a serialized GIE engine\n");
printf(" --calib=<file> Read INT8 calibration cache file\n");
fflush(stdout);
}
static ICudaEngine* createEngine()
{
ICudaEngine *engine;
if (!gParams.deployFile.empty()) {
engine = caffeToGIEModel();
if (!engine)
{
std::cerr << "Engine could not be created" << std::endl;
return nullptr;
}
if (!gParams.engine.empty())
{
std::ofstream p(gParams.engine);
if (!p)
{
std::cerr << "could not open plan output file" << std::endl;
return nullptr;
}
IHostMemory *ptr = engine->serialize();
assert(ptr);
p.write(reinterpret_cast<const char*>(ptr->data()), ptr->size());
ptr->destroy();
}
return engine;
}
// load directly from serialized engine file if deploy not specified
if (!gParams.engine.empty()) {
char *gieModelStream{nullptr};
size_t size{0};
std::ifstream file(gParams.engine, std::ios::binary);
if (file.good()) {
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
gieModelStream = new char;
assert(gieModelStream);
file.read(gieModelStream, size);
file.close();
}
IRuntime* infer = createInferRuntime(gLogger);
engine = infer->deserializeCudaEngine(gieModelStream, size, nullptr);
if (gieModelStream) delete [] gieModelStream;
// assume input to be "data" for deserialized engine
gInputs.push_back("data");
return engine;
}
// complain about empty deploy file
std::cerr << "Deploy file not specified" << std::endl;
return nullptr;
}
int main(int argc, char** argv)
{
// create a GIE model from the caffe model and serialize it to a stream
gParams.deployFile="model/test.prototxt";
gParams.modelFile="model/mnist.caffemodel";
gParams.calibrationCache="";
gParams.engine="";
gParams.outputs.push_back("fc2");
gParams.device=0;
gParams.batchSize=1;
gParams.workspaceSize=16;
std::string working_file="test.txt";
cudaSetDevice(gParams.device);
if (gParams.outputs.size() == 0)
{
std::cerr << "At least one network output must be defined" << std::endl;
return -1;
}
ICudaEngine* engine = createEngine();
if (!engine)
{
std::cerr << "Engine could not be created" << std::endl;
return -1;
}
doInference(*engine,working_file);
engine->destroy();
return 0;
}