#include "onnxInference.hpp" bool OnnxInference::build() { auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { return false; } const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = SampleUniquePtr(builder->createNetworkV2(explicitBatch)); if (!network) { return false; } auto config = SampleUniquePtr(builder->createBuilderConfig()); if (!config) { return false; } auto parser = SampleUniquePtr(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); if (!parser) { return false; } auto constructed = constructNetwork(builder, network, config, parser); if (!constructed) { return false; } // CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { return false; } config->setProfileStream(*profileStream); SampleUniquePtr plan{ builder->buildSerializedNetwork(*network, *config) }; if (!plan) { return false; } SampleUniquePtr runtime{ createInferRuntime(sample::gLogger.getTRTLogger()) }; if (!runtime) { return false; } mEngine = std::shared_ptr( runtime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) { return false; } ASSERT(network->getNbInputs() == 1); mInputDims = network->getInput(0)->getDimensions(); ASSERT(mInputDims.nbDims == 4); ASSERT(network->getNbOutputs() == 1); mOutputDims = network->getOutput(0)->getDimensions(); ASSERT(mOutputDims.nbDims == 4); return true; } bool OnnxInference::constructNetwork(SampleUniquePtr& builder, SampleUniquePtr& network, SampleUniquePtr& config, SampleUniquePtr& parser) { auto parsed = parser->parseFromFile(locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast(sample::gLogger.getReportableSeverity())); if (!parsed) { return false; } config->setMaxWorkspaceSize(16_MiB); if (mParams.fp16) { //config->setFlag(BuilderFlag::kFP16); } samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore); return true; } bool OnnxInference::infer(std::vector& data, const Dims4 dims) { // Create RAII buffer manager object samplesCommon::BufferManager buffers(mEngine); auto context = SampleUniquePtr(mEngine->createExecutionContext()); if (!context) { return false; } // Read the input data into the managed buffers assert(mParams.inputTensorNames.size() == 1); if (!processInput(buffers, data)) { return false; } std::vector().swap(data); // Memcpy from host input buffers to device input buffers buffers.copyInputToDevice(); bool status = context->executeV2(buffers.getDeviceBindings().data()); if (!status) { return false; } // Memcpy from device output buffers to host output buffers buffers.copyOutputToHost(); // Verify results const int outputSize = mInputDims.d[2] * mInputDims.d[3] * mInputDims.d[0]; float* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); // return result data = std::vector(output, output + outputSize); return true; } //! //! \brief Reads the input and stores the result in a managed buffer //! bool OnnxInference::processInput(const samplesCommon::BufferManager& buffers, const std::vector data) { const int count = mInputDims.d[2] * mInputDims.d[3] * mInputDims.d[0]; float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); //std::copy(data.begin(), data.end(), buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int i = 0; i < count; i++) { hostDataBuffer[i] = data[i]; } return true; }