#include "NvInfer.h" #include "NvInferPlugin.h" #include #include "cuda_runtime_api.h" #include "logging.h" #include "utils.hpp" #include #include #include #include #include #include #define DEVICE 0 // GPU id #define NMS_THRESH 0.4 #define CONF_THRESH 0.5 #define BATCH_SIZE 1 #define CHECK(status) \ do\ {\ auto ret = (status);\ if (ret != 0)\ {\ std::cerr << "Cuda failure: " << ret << std::endl;\ abort();\ }\ } while (0) // stuff we know about the network and the input/output blobs static const int INPUT_H = Yolo::INPUT_H; static const int INPUT_W = Yolo::INPUT_W; static const int CLASS_NUM = Yolo::CLASS_NUM; static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1 const char* INPUT_BLOB_NAME = "images"; const char* OUTPUT_BLOB_NAME = "output"; using namespace nvinfer1; static sample::Logger gLogger{sample::Logger::Severity::kINFO}; struct Yolov5TRTParams { /* data */ int32_t batchSize{1}; // Number of inputs in a batch const char* inputTensorName = "data"; const char* outputTensorName = "prob"; std::string trtEngineFile; // trt engine file name }; class YoloV5TensorRT{ public: YoloV5TensorRT(const Yolov5TRTParams params) : mParams(params) , mEngine(nullptr) , mContext(nullptr) , mRuntime(nullptr) { } // Runs the Tensorrt network inference engine on a sample. bool deserialize(); int getInputIndex(); int getOutputIndex(); bool destroy(); void doInference(cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize); private: Yolov5TRTParams mParams; // The parameters for densenet121. ICudaEngine* mEngine; // The tensorrt engine used to run the network. IExecutionContext* mContext; // The TensorRT execution context to run inference. IRuntime* mRuntime; }; /** * Uses the serialized engine file and reads * data into a stream to deserialize the cuda * engine from the stream. **/ bool YoloV5TensorRT::deserialize(){ if (mContext != nullptr && mEngine != nullptr) { return true; } if (mEngine == nullptr) { char* trtModelStream{nullptr}; size_t size{0}; // open file std::ifstream f(mParams.trtEngineFile, std::ios::binary); if (f.good()) { // get size f.seekg(0, f.end); size = f.tellg(); f.seekg(0, f.beg); trtModelStream = new char[size]; // read data as a block f.read(trtModelStream, size); f.close(); } if (trtModelStream == nullptr) { return false; } // deserialize mRuntime = createInferRuntime(gLogger); assert(mRuntime); mEngine = mRuntime -> deserializeCudaEngine(trtModelStream, size, 0); assert(mEngine != nullptr); delete[] trtModelStream; } std::cout << "deserialized engine successfully." << std::endl; // assert(mEngine->getNbBindings() == 2); // cout << "Num bindings: " << mEngine -> getNbBindings() << endl; // create execution context mContext = mEngine -> createExecutionContext(); assert(mContext != nullptr); return true; } int YoloV5TensorRT::getInputIndex() { return mEngine->getBindingIndex(INPUT_BLOB_NAME); } int YoloV5TensorRT::getOutputIndex() { return mEngine->getBindingIndex(OUTPUT_BLOB_NAME); } bool YoloV5TensorRT::destroy(){ // Destroy the engine mContext->destroy(); mEngine->destroy(); mRuntime->destroy(); return true; } void YoloV5TensorRT::doInference(cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize) { // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream)); std::cout << "si ..." << std::endl; mContext->enqueue(batchSize, buffers, stream, nullptr); std::cout << "si ..." << std::endl; CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream)); std::cout << "si ..." << std::endl; cudaStreamSynchronize(stream); } /** * Initializes Yolov5Trt class params in the * Yolov5TRTParams structure. **/ Yolov5TRTParams initializeParams() { Yolov5TRTParams params; params.batchSize = 1; // change engine file name here params.trtEngineFile = "../yolov5s.engine"; return params; } int main(int argc, char** argv){ // bool p = initLibNvInferPlugins(&gLogger, ""); cudaSetDevice(DEVICE); std::string img_dir; if (argc != 3) { std::cerr << "Invalid args. please check." << std::endl; std::cerr <<"./yolov5 -d ../samples // deserialize python engine" << std::endl; return 0; } img_dir = std::string(argv[2]); Yolov5TRTParams params = initializeParams(); YoloV5TensorRT yolov5trt(params); // check if engine exists already std::ifstream f(params.trtEngineFile, std::ios::binary); // if engine does not exists build, serialize and save if(!f.good()) { std::cerr << "Tensorrt engine file not found ..." << std::endl; return 0; } else { // deserialize std::cout << "engine already exists ..." << std::endl; yolov5trt.deserialize(); } // get images std::vector file_names; if (read_files_in_dir(img_dir.c_str(), file_names) < 0) { std::cerr << "read_files_in_dir failed." << std::endl; return -1; } // prepare input data static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W]; static float prob[BATCH_SIZE * OUTPUT_SIZE]; void* buffers[2]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() // const int inputIndex = yolov5trt.getInputIndex(); // const int outputIndex = yolov5trt.getOutputIndex(); const int inputIndex = 0; const int outputIndex = 1; std::cout << std::to_string(inputIndex)<< std::endl; assert(inputIndex == 0); std::cout << std::to_string(outputIndex)<< std::endl; assert(outputIndex == 1); // Create GPU buffers on device CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float))); // Create stream cudaStream_t stream; CHECK(cudaStreamCreate(&stream)); // run inference here int fcount = 0; for (int f = 0; f < (int)file_names.size(); f++) { fcount++; if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue; for (int b = 0; b < fcount; b++) { cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]); if (img.empty()) continue; cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB int i = 0; for (int row = 0; row < INPUT_H; ++row) { uchar* uc_pixel = pr_img.data + row * pr_img.step; for (int col = 0; col < INPUT_W; ++col) { data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0; data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0; data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0; uc_pixel += 3; ++i; } } } // Run inference auto start = std::chrono::system_clock::now(); yolov5trt.doInference(stream, buffers, data, prob, BATCH_SIZE); auto end = std::chrono::system_clock::now(); std::cout << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; std::vector> batch_res(fcount); for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH); } for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; //std::cout << res.size() << std::endl; cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]); for (size_t j = 0; j < res.size(); j++) { cv::Rect r = get_rect(img, res[j].bbox); cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2); cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2); } cv::imwrite("_" + file_names[f - fcount + 1 + b], img); } fcount = 0; } // Release stream and buffers cudaStreamDestroy(stream); CHECK(cudaFree(buffers[inputIndex])); CHECK(cudaFree(buffers[outputIndex])); // Destroy the engine yolov5trt.destroy(); return 0; }