// // Created by lucky on 19-1-19. // #include #include #include #include #include #include #include #include #include #include #include //#include #include #include #include "NvInfer.h" #include "NvCaffeParser.h" #include "common.h" using namespace nvinfer1; using namespace nvcaffeparser1; //using namespace cv; using namespace std; // stuff we know about the network and the caffe input/output blobs static const int INPUT_H = 112; static const int INPUT_W = 96; static const int OUTPUT_SIZE = 512; static Logger gLogger; const char* INPUT_BLOB_NAME = "data"; const char* OUTPUT_BLOB_NAME = "fc5"; const std::vector directories{ "data/samples/mnist/", "data/mnist/" }; std::string locateFile(const std::string& input) { return locateFile(input, directories); } // simple PGM (portable greyscale map) reader void readPGMFile(const std::string& fileName, uint8_t buffer[INPUT_H*INPUT_W]) { readPGMFile(fileName, buffer, INPUT_H, INPUT_W); } void caffeToGIEModel(const std::string& deployFile, // name for caffe prototxt const std::string& modelFile, // name for model const std::vector& outputs, // network outputs unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with) IHostMemory *&gieModelStream) // output buffer for the GIE model { // create the builder IBuilder* builder = createInferBuilder(gLogger); // parse the caffe model to populate the network, then set the outputs INetworkDefinition* network = builder->createNetwork(); ICaffeParser* parser = createCaffeParser(); const IBlobNameToTensor* blobNameToTensor = parser->parse(locateFile(deployFile, directories).c_str(), locateFile(modelFile, directories).c_str(), *network, DataType::kFLOAT); // specify which tensors are outputs for (auto& s : outputs) network->markOutput(*blobNameToTensor->find(s.c_str())); // Build the engine builder->setMaxBatchSize(maxBatchSize); builder->setMaxWorkspaceSize(1 << 20); ICudaEngine* engine = builder->buildCudaEngine(*network); assert(engine); // we don't need the network any more, and we can destroy the parser network->destroy(); parser->destroy(); // serialize the engine, then close everything down gieModelStream = engine->serialize(); engine->destroy(); builder->destroy(); shutdownProtobufLibrary(); } void doInference(IExecutionContext& context, float* input, float* output, int batchSize) { const ICudaEngine& engine = context.getEngine(); // input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(), // of these, but in this case we know that there is exactly one input and one output. assert(engine.getNbBindings() == 2); void* buffers[2]; // In order to bind the buffers, we need to know the names of the input and output tensors. // note that indices are guaranteed to be less than IEngine::getNbBindings() int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME), outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME); // create GPU buffers and a stream CHECK(cudaMalloc(&buffers[inputIndex], batchSize * INPUT_H * INPUT_W * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float))); cudaStream_t stream; CHECK(cudaStreamCreate(&stream)); // DMA the input to the GPU, execute the batch asynchronously, and DMA it back: CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream)); context.enqueue(batchSize, buffers, stream, nullptr); CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE*sizeof(float), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream); // release the stream and the buffers cudaStreamDestroy(stream); CHECK(cudaFree(buffers[inputIndex])); CHECK(cudaFree(buffers[outputIndex])); } int main(int argc, char** argv) { // create a GIE model from the caffe model and serialize it to a stream IHostMemory *gieModelStream{nullptr}; caffeToGIEModel("sphereface.prototxt", "sphereface_model.caffemodel", std::vector < std::string > { OUTPUT_BLOB_NAME }, 1, gieModelStream); // read a random digit file // srand(unsigned(time(nullptr))); // uint8_t fileData[INPUT_H*INPUT_W]; // int num = rand() % 10; // readPGMFile(locateFile(std::to_string(num) + ".pgm", directories), fileData); // // // print an ascii representation // std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl; // for (int i = 0; i < INPUT_H*INPUT_W; i++) // std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n"); // parse the mean file and subtract it from the image // ICaffeParser* parser = createCaffeParser(); // IBinaryProtoBlob* meanBlob = parser->parseBinaryProto(locateFile("mnist_mean.binaryproto", directories).c_str()); // parser->destroy(); // const float *meanData = reinterpret_cast(meanBlob->getData()); // // float data[INPUT_H*INPUT_W]; // for (int i = 0; i < INPUT_H*INPUT_W; i++) // data[i] = float(fileData[i])-meanData[i]; // meanBlob->destroy(); // LOG(INFO)<<"face_recognizer->extract_feature_or_identify"; // deserialize the engine IRuntime* runtime = createInferRuntime(gLogger); ICudaEngine* engine = runtime->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), nullptr); if (gieModelStream) gieModelStream->destroy(); IExecutionContext *context = engine->createExecutionContext(); cv::Mat image = cv::imread("/home/lucky/Downloads/TensorRT-3.0.4/data/mnist/aligned_face_0.jpg");//n*c*h*w:1*3*112*96 image.convertTo(image, CV_32FC3, 1.0/128, -127.5/128); std::vector channels(3); cv::split(image, channels); std::vector input_data; for (auto &c : channels) { input_data.insert(input_data.end(), (float *)c.datastart, (float *)c.dataend); } // run inference float prob[OUTPUT_SIZE]; doInference(*context, input_data.data(), prob,3); std::cout<destroy(); engine->destroy(); runtime->destroy(); return 0; // print a histogram of the output distribution // std::cout << "\n\n"; // float val{0.0f}; // int idx{0}; // for (unsigned int i = 0; i < 10; i++) // { // val = std::max(val, prob[i]); // if (val == prob[i]) idx = i; // std::cout << i << ": " << std::string(int(std::floor(prob[i] * 10 + 0.5f)), '*') << "\n"; // } // std::cout << std::endl; // // return (idx == num && val > 0.9f) ? EXIT_SUCCESS : EXIT_FAILURE; }