/* * Copyright 1993-2019 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ //! //! sampleSSD.cpp //! This file contains the implementation of the SSD sample. It creates the network using //! the SSD caffe model. //! It can be run with the following command line: //! Command: ./sample_ssd [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir] //! //! //! \brief Creates the network, configures the builder and creates the network engine //! //! \details This function creates the SSD network by parsing the caffe model and builds //! the engine that will be used to run SSD (mEngine) //! //! \return Returns true if the engine was created successfully and false otherwise //! #include "sampleSSD.h" bool SampleSSD::build() { initLibNvInferPlugins(&gLogger.getTRTLogger(), ""); std::string engineName = std::string("ssd_Caffe_TRT_Engine") + ".buf"; bool engineExist = std::ifstream(engineName).good(); IRuntime* runtime = createInferRuntime(gLogger.getTRTLogger()); assert(runtime != nullptr); if (engineExist) { assert(mEngine != nullptr); gLogInfo << "*** deserializing" << std::endl; // Deserialize the engine std::string buffer = readBuffer(engineName); mEngine = std::shared_ptr( runtime->deserializeCudaEngine(buffer.data(), buffer.size(), nullptr), samplesCommon::InferDeleter()); gLogInfo << "*** deserializing now:" << std::endl; runtime->destroy(); gLogInfo << "*** executed" << std::endl; assert(mEngine != nullptr && "loaded afterwards"); gLogInfo << "*** deserializing is done next -> infer" << std::endl; gLogInfo << "*** Final success deserializing from disk" << std::endl; return true; } else { auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(gLogger.getTRTLogger())); if (!builder) { return false; } auto network = SampleUniquePtr(builder->createNetwork()); if (!network) { return false; } auto config = SampleUniquePtr(builder->createBuilderConfig()); if (!config) { return false; } auto parser = SampleUniquePtr(nvcaffeparser1::createCaffeParser()); if (!parser) { return false; } auto constructed = constructNetwork(builder, network, config, parser); if (!constructed) { return false; } assert(network->getNbInputs() == 1); mInputDims = network->getInput(0)->getDimensions(); assert(mInputDims.nbDims == 3); assert(trtModelStream != nullptr); writeBuffer(trtModelStream->data(), trtModelStream->size(), engineName); // Deserialize the engine // engine = runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr); gLogInfo << "*** deserializing starts" << std::endl; mEngine = std::shared_ptr(runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr), samplesCommon::InferDeleter()); assert(mEngine != nullptr); gLogInfo << "*** deserializing success" << std::endl; trtModelStream->destroy(); gLogInfo << "*** destroy success" << std::endl; runtime->destroy(); if (!mEngine) { return false; } else { gLogInfo << "*** Final success creating to disk" << std::endl; return true; } } } //! //! \brief Uses a caffe parser to create the SSD Network and marks the //! output layers //! //! \param network Pointer to the network that will be populated with the SSD network //! //! \param builder Pointer to the engine builder //! bool SampleSSD::constructNetwork(SampleUniquePtr& builder, SampleUniquePtr& network, SampleUniquePtr& config, SampleUniquePtr& parser) { const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(locateFile(mParams.prototxtFileName, mParams.dataDirs).c_str(), locateFile(mParams.weightsFileName, mParams.dataDirs).c_str(), *network, DataType::kFLOAT); for (auto& s : mParams.outputTensorNames) { network->markOutput(*blobNameToTensor->find(s.c_str())); } builder->setMaxBatchSize(mParams.batchSize); config->setMaxWorkspaceSize(36_MiB); if (mParams.fp16) { config->setFlag(BuilderFlag::kFP16); } // Calibrator life time needs to last until after the engine is built. std::unique_ptr calibrator; if (mParams.int8) { gLogInfo << "Using Entropy Calibrator 2" << std::endl; BatchStream calibrationStream(mParams.batchSize, mParams.nbCalBatches, mParams.calibrationBatches, mParams.dataDirs); calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "SSD", mParams.inputTensorNames[0].c_str())); config->setFlag(BuilderFlag::kINT8); config->setInt8Calibrator(calibrator.get()); } samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore); gLogInfo << "Begin building engine..." << std::endl; mEngine = std::shared_ptr( builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter()); assert(engine); gLogInfo << "End building engine..." << std::endl; if (!mEngine) { return false; } trtModelStream = mEngine->serialize(); assert(trtModelStream != nullptr); mEngine->destroy(); builder->destroy(); return true; } //! //! \brief Runs the TensorRT inference engine for this sample //! //! \details This function is the main execution function of the sample. It allocates the buffer, //! sets inputs and executes the engine. //! void SampleSSD::writeBuffer(void* buffer, size_t size, std::string const& path) { std::ofstream stream(path.c_str(), std::ios::binary); if (stream) stream.write(static_cast(buffer), size); } // Returns empty string iff can't read the file std::string SampleSSD::readBuffer(std::string const& path) { std::string buffer; std::ifstream stream(path.c_str(), std::ios::binary); if (stream) { stream >> std::noskipws; copy(std::istream_iterator(stream), std::istream_iterator(), back_inserter(buffer)); } return buffer; } bool SampleSSD::infer() { // Create RAII buffer manager object gLogInfo << "*** infer entered" << std::endl; samplesCommon::BufferManager buffers(mEngine, mParams.batchSize); auto context = SampleUniquePtr(mEngine->createExecutionContext()); if (!context) { return false; } // Read the input data into the managed buffers gLogInfo << "*** not executed 3" << std::endl; assert(mParams.inputTensorNames.size() == 1); if (!processInput(buffers)) { return false; } gLogInfo << "*** not executed 4" << std::endl; // Memcpy from host input buffers to device input buffers buffers.copyInputToDevice(); gLogInfo << "*** not executed 4-5" << std::endl; bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data()); if (!status) { return false; } // Memcpy from device output buffers to host output buffers buffers.copyOutputToHost(); // Post-process detections and verify results gLogInfo << "*** not executed 5" << std::endl; if (!verifyOutput(buffers)) { return false; } return true; } //! //! \brief Cleans up any state created in the sample class //! bool SampleSSD::teardown() { //! Clean up the libprotobuf files as the parsing is complete //! \note It is not safe to use any other part of the protocol buffers library after //! ShutdownProtobufLibrary() has been called. nvcaffeparser1::shutdownProtobufLibrary(); return true; } //! //! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer //! bool SampleSSD::processInput(const samplesCommon::BufferManager& buffers) { const int inputC = mInputDims.d[0]; const int inputH = mInputDims.d[1]; const int inputW = mInputDims.d[2]; const int batchSize = mParams.batchSize; // Available images std::vector imageList = {"dog.ppm"}; mPPMs.resize(batchSize); assert(mPPMs.size() <= imageList.size()); for (int i = 0; i < batchSize; ++i) { readPPMFile(locateFile(imageList[i], mParams.dataDirs), mPPMs[i]); } // Fill data buffer float* hostDataBuffer = static_cast(buffers.getHostBuffer("data")); float pixelMean[3]{104.0f, 117.0f, 123.0f}; // In BGR order // Host memory for input buffer for (int i = 0, volImg = inputC * inputH * inputW; i < mParams.batchSize; ++i) { for (int c = 0; c < inputC; ++c) { // The color image to input should be in BGR order for (unsigned j = 0, volChl = inputH * inputW; j < volChl; ++j) { gLogInfo << "*** not executed process inside 3" << std::endl; hostDataBuffer[i * volImg + c * volChl + j] = float(mPPMs[i].buffer[j * inputC + 2 - c]) - pixelMean[c]; } } } gLogInfo << "*** not executed process 3" << std::endl; return true; } //! //! \brief Filters output detections and verify result //! //! \return whether the detection output matches expectations //! bool SampleSSD::verifyOutput(const samplesCommon::BufferManager& buffers) { const int inputH = mInputDims.d[1]; const int inputW = mInputDims.d[2]; const int batchSize = mParams.batchSize; const int keepTopK = mParams.keepTopK; const float visualThreshold = mParams.visualThreshold; const int outputClsSize = mParams.outputClsSize; const float* detectionOut = static_cast(buffers.getHostBuffer("detection_out")); const int* keepCount = static_cast(buffers.getHostBuffer("keep_count")); const std::vector classes{"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; // List of class labels bool pass = true; for (int p = 0; p < batchSize; ++p) { int numDetections = 0; // is there at least one correct detection? bool correctDetection = false; for (int i = 0; i < keepCount[p]; ++i) { const float* det = detectionOut + (p * keepTopK + i) * 7; if (det[2] < visualThreshold) { continue; } assert((int) det[1] < outputClsSize); std::string storeName = classes[(int) det[1]] + "-" + std::to_string(det[2]) + ".ppm"; numDetections++; if ((classes[(int) det[1]] == "car") || (classes[(int) det[1]] == "dog")) { correctDetection = true; } gLogInfo << " Image name:" << mPPMs[p].fileName.c_str() << ", Label: " << classes[(int) det[1]].c_str() << "," << " confidence: " << det[2] * 100.f << " xmin: " << det[3] * inputW << " ymin: " << det[4] * inputH << " xmax: " << det[5] * inputW << " ymax: " << det[6] * inputH << std::endl; samplesCommon::writePPMFileWithBBox(storeName, mPPMs[p], {det[3] * inputW, det[4] * inputH, det[5] * inputW, det[6] * inputH}); } pass &= numDetections >= 1; pass &= correctDetection; } return pass; }