#include #include "NvInferPlugin.h" #include "NvCaffeParser.h" #include "NvUffParser.h" #include "common.h" #include #include #include #include #include #include "Utils.h" #include #include "logging.h" #include "logger.h" nvinfer1::IExecutionContext* mTrtContext_uff; nvinfer1::ICudaEngine* mTrtEngine_uff; nvinfer1::IRuntime* mTrtRunTime_uff; cudaStream_t mTrtCudaStream_uff; std::vector mTrtCudaBuffer_uff; std::vector mTrtBindBufferSize_uff; int mTrtInputCount_uff; int mTrtIterationTime_uff; int mTrtBatchSize_uff; inline void* safeCudaMalloc(size_t memSize) { void* deviceMem; //CUDA_CHECK(cudaMalloc(&deviceMem, memSize)); cudaMalloc(&deviceMem, memSize); if (deviceMem == nullptr) { std::cerr << "Out of memory" << std::endl; exit(1); } return deviceMem; } inline int64_t volume(const nvinfer1::Dims& d) { return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); } inline unsigned int getElementSize(nvinfer1::DataType t) { switch (t) { case nvinfer1::DataType::kINT32: return 4; case nvinfer1::DataType::kFLOAT: return 4; case nvinfer1::DataType::kHALF: return 2; case nvinfer1::DataType::kINT8: return 1; } throw std::runtime_error("Invalid DataType."); return 0; } inline size_t getOutputSize_uff() { return accumulate(mTrtBindBufferSize_uff.begin() + mTrtInputCount_uff, mTrtBindBufferSize_uff.end(), 0); }; void InitEngine_uff() { //const int maxBatchSize = 1; mTrtBatchSize_uff = mTrtEngine_uff->getMaxBatchSize(); std::cout << "MaxBatchSize: " << mTrtBatchSize_uff << std::endl; mTrtContext_uff = mTrtEngine_uff->createExecutionContext(); assert(mTrtContext_uff != nullptr); //mTrtContext_uff->setProfiler(&mTrtProfiler_uff); // Input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings() int nbBindings = mTrtEngine_uff->getNbBindings(); mTrtCudaBuffer_uff.resize(nbBindings); mTrtBindBufferSize_uff.resize(nbBindings); for (int i = 0; i < nbBindings; ++i) { Dims dims = mTrtEngine_uff->getBindingDimensions(i); DataType dtype = mTrtEngine_uff->getBindingDataType(i); int64_t totalSize = volume(dims) * mTrtBatchSize_uff * getElementSize(dtype); mTrtBindBufferSize_uff[i] = totalSize; mTrtCudaBuffer_uff[i] = safeCudaMalloc(totalSize); if (mTrtEngine_uff->bindingIsInput(i)) mTrtInputCount_uff++; else { std::cout << "binding Index: " << i << std::endl; std::cout << "mTrtBindBufferSize_uff: " << mTrtBindBufferSize_uff[i] << std::endl; } } //CUDA_CHECK(cudaStreamCreate(&mTrtCudaStream_uff)); cudaStreamCreate(&mTrtCudaStream_uff); } nvinfer1::ICudaEngine* loadModelAndCreateEngine_uff(const char* modelFile, int maxBatchSize, nvuffparser::IUffParser* parser, nvinfer1::IHostMemory*& trtModelStream) { std::cout << "loadModelAndCreateEngine_uff" << std::endl; // Create the builder IBuilder* builder = createInferBuilder(gLogger); // Parse the model to populate the network, then set the outputs. INetworkDefinition* network = builder->createNetwork(); std::cout << "Begin parsing model..." << std::endl; if (!parser->parse(modelFile, *network, nvinfer1::DataType::kFLOAT)) std::cout << "fail to parse" << std::endl; std::cout << "End parsing model..." << std::endl; // Build the engine. builder->setMaxBatchSize(maxBatchSize); std::cout << "loadModelAndCreateEngine_uff 2" << std::endl; builder->setMaxWorkspaceSize(2 << 10);// 1G std::cout << "Begin building engine..." << std::endl; ICudaEngine* engine = builder->buildCudaEngine(*network); if (!engine) std::cout << "Unable to create engine" << std::endl; std::cout << "End building engine..." << std::endl; // We don't need the network any more, and we can destroy the parser. network->destroy(); parser->destroy(); // Serialize the engine, then close everything down. trtModelStream = engine->serialize(); builder->destroy(); //shutdownProtobufLibrary(); return engine; } void saveEngine_uff(std::string fileName) { if (mTrtEngine_uff) { nvinfer1::IHostMemory* data = mTrtEngine_uff->serialize(); std::ofstream file; file.open(fileName, std::ios::binary | std::ios::out); if (!file.is_open()) { std::cout << "read create engine file" << fileName << " failed" << std::endl; return; } file.write((const char*)data->data(), data->size()); file.close(); } } int main(int argc, char* argv[]) { std::map params; //string configFile = "PABody20"; std::string configFile = "PAHead20"; std::string precision = "fp32"; std::cout << "configFile: " << configFile << std::endl; std::cout << "precision: " << precision << std::endl; // Read the UFF config.... std::string ufffile = "PA_Head20.uff"; std::string inputName = "input_3"; std::vector outputName; outputName.push_back("gender_output/Softmax"); outputName.push_back("eyewear_output/Softmax"); outputName.push_back("hairlength_output/Softmax"); outputName.push_back("facehair_output/Softmax"); outputName.push_back("headwear_output/Softmax"); outputName.push_back("haircolor_output/Softmax"); outputName.push_back("mask_output/Softmax"); outputName.push_back("age_output/Softmax"); int _number_of_outputs; int _channel = 3; double _bias = 0; double _norm = 1; int _ClassifierImageHeight = 96; int _ClassifierImageWidth = 96; std::string _imgType = "RGB"; //string _yoloLabel; // yolo label that gives bounding box... int _number_of_classes; int _ClassifierBatchSize = 1; std::vector> calibratorData; std::string calibFileList = ""; std::string modelName = ufffile; modelName.erase(modelName.end() - 4, modelName.end()); std::cout << "modelName: " << modelName << std::endl; std::string saveName; saveName = modelName + "_" + precision + "batch_" + std::to_string(_ClassifierBatchSize) + ".engine"; bool fileExists = false; std::ifstream infile(saveName); if (infile.good()) fileExists = true; //Tn::trtNet_uff* net; if (fileExists) { //net = new Tn::trtNet_uff(saveName); std::fstream file; file.open(saveName, std::ios::binary | std::ios::in); if (!file.is_open()) { std::cout << "read engine file" << saveName << " failed" << std::endl; //return; } file.seekg(0, std::ios::end); int length = file.tellg(); file.seekg(0, std::ios::beg); std::unique_ptr data(new char[length]); file.read(data.get(), length); file.close(); std::cout << "*** deserializing" << std::endl; mTrtRunTime_uff = createInferRuntime(gLogger); assert(mTrtRunTime_uff != nullptr); //mTrtEngine_uff = mTrtRunTime_uff->deserializeCudaEngine(data.get(), length/*, &mTrtPluginFactory_uff*/); // TensorRT 7.0.0.11 mTrtEngine_uff = mTrtRunTime_uff->deserializeCudaEngine(data.get(), length, NULL); // TensorRT 5.1.5 assert(mTrtEngine_uff != nullptr); InitEngine_uff(); } else { std::cout << "init plugin uff: " << ufffile << std::endl; auto parser = nvuffparser::createUffParser(); parser->registerInput(inputName.c_str(), DimsCHW(_channel, _ClassifierImageHeight, _ClassifierImageWidth), nvuffparser::UffInputOrder::kNCHW); for (auto& name : outputName) { parser->registerOutput(name.c_str()); } //const int maxBatchSize = 2; IHostMemory* trtModelStream{ nullptr }; //nvinfer1::Int8EntropyCalibrator * calibrator = nullptr; //nvinfer1::IInt8Calibrator* calibrator = nullptr; //PluginFactory_uff pluginFactorySerialize_uff; nvinfer1::ICudaEngine* tmpEngine = loadModelAndCreateEngine_uff(ufffile.c_str(), _ClassifierBatchSize, parser, trtModelStream); assert(tmpEngine != nullptr); assert(trtModelStream != nullptr); tmpEngine->destroy(); //pluginFactorySerialize_uff.destroyPlugin(); mTrtRunTime_uff = createInferRuntime(gLogger); assert(mTrtRunTime_uff != nullptr); //mTrtEngine_uff = mTrtRunTime_uff->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size()/*, &mTrtPluginFactory_uff*/); // TensorRT 7.0.0.11 mTrtEngine_uff = mTrtRunTime_uff->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), NULL); // TensorRT 5.1.5 assert(mTrtEngine_uff != nullptr); // Deserialize the engine. trtModelStream->destroy(); std::cout << "save Engine..." << saveName << std::endl; saveEngine_uff(saveName); InitEngine_uff(); } system("pause"); }