int main (const cv::Mat image, ...... ) { std::string PLAN = "./PLAN"; std::string proto_file = "./demo.prototxt"; std::string model_file = "./demo.caffemodel"; std::ifstream cache(PLAN); std::stringstream modelStream; modelStream.seekg(0, modelStream.beg); // caffe -> PLAN if( !cache ) { nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(ggLogger); nvinfer1::INetworkDefinition* network = builder->createNetwork(); nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser(); const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(proto_file.c_str(), model_file.c_str(), *network, nvinfer1::DataType::kFLOAT); nvinfer1::ITensor* output = blobNameToTensor->find(OUTPUT_BLOB); network->markOutput(*output); builder->setMaxBatchSize(1); builder->setMaxWorkspaceSize((16 << 20)); builder->setFp16Mode(true); //builder->setStrictTypeConstraints(true); nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network); nvinfer1::IHostMemory* serializer = engine->serialize(); modelStream.write((const char*)serializer->data(), serializer->size()); std::ofstream output_obj; output_obj.open(PLAN); output_obj << modelStream.rdbuf(); output_obj.close(); network->destroy(); parser->destroy(); engine->destroy(); builder->destroy(); //serializeEngine->destroy(); modelStream.seekg(0, modelStream.beg); } else { modelStream << cache.rdbuf(); cache.close(); } // PLAN -> engine modelStream.seekg(0, std::ios::end); const int size = modelStream.tellg(); modelStream.seekg(0, std::ios::beg); void* mem = malloc(size); modelStream.read((char*)mem, size); nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(ggLogger); nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(mem, size, NULL); nvinfer1::IExecutionContext* context = engine->createExecutionContext(); free(mem); // buffers float* input_data; float* output_data; nvinfer1::Dims inputDims = engine->getBindingDimensions(engine->getBindingIndex( INPUT_BLOB)); nvinfer1::Dims outputDims = engine->getBindingDimensions(engine->getBindingIndex(OUTPUT_BLOB)); cudaMallocManaged( &input_data, inputDims.d[0]* inputDims.d[1]* inputDims.d[2]*sizeof(float)); cudaMallocManaged(&output_data, outputDims.d[0]*outputDims.d[1]*outputDims.d[2]*sizeof(float)); std::cout << INPUT_BLOB <<": ("<< inputDims.d[0] <<","<< inputDims.d[1] <<","<< inputDims.d[2] <<")"<(row, col)[channel]; } } } cudaDeviceSynchronize(); void* buffers[] = { input_data, output_data }; context->execute(1, buffers); cudaDeviceSynchronize(); //std::cout << "calculate prob" << std::endl; size_t ch = outputDims.d[0]; for( size_t i=0; i 0.000001f ) cout << " index is " << output_data[i] << endl; } prob = output_data[1]; //std::cout << "finish inference" << std::endl; //post prob = post_process(prob, d_prob, 0.4f); nvcaffeparser1::shutdownProtobufLibrary(); //cudaFree(input_data); //cudaFree(output_data); cudaFree(buffers[0]); cudaFree(buffers[1]); context->destroy(); engine->destroy(); return 0; }