tensorRT:when calling buildCudaEngine,it showed segmentation fault

Program received signal SIGSEGV, Segmentation fault。
0x0000007fb7494150 in nvinfer1::cudnn::validateConfig(nvinfer:Network const&,nvinfer1::CudaEngineBuildContext const&)() from /home/ubuntu/code/GIE/GIE_test/lenet/lib/libnvinfer.so

Hello sherrysherry, thank you for reporting this. Please provide additional information regarding your L4T version and software configuration information so we can attempt to assist with this issue.

I called GIE interface to build my own model based on the sample of GIE,see the program details below:
#include <assert.h>
#include
#include
#include
#include
#include
#include <sys/stat.h>
#include
#include <time.h>
#include <cuda_runtime_api.h>

#include “Infer.h”
#include “caffeParser.h”
#include
#include

using namespace nvinfer1;
using namespace nvcaffeparser1;

#define CHECK(status)
{
if (status != 0)
{
std::cout << "Cuda failure: " << status;
abort();
}
}

// Logger for GIE info/warning/errors
class Logger : public ILogger
{
void log(Severity severity, const char* msg) override
{
if (severity!=Severity::kINFO)
std::cout << msg << std::endl;
}
} gLogger;

const int batchSize=1;
static const int INPUT_H = 28;
static const int INPUT_W = 28;
static const int OUTPUT_SIZE = 10;
/////////////////与源码修改之处 data转换为conv//////////
const char* INPUT_BLOB_NAME = “conv1”;
const char* OUTPUT_BLOB_NAME = “prob”;

std::string locateFile(const std::string& input)
{
std::string file = “…/data/mnist/” + input;
struct stat info;
int i, MAX_DEPTH = 10;
for (i = 0; i < MAX_DEPTH && stat(file.c_str(), &info); i++)
file = “…/” + file;

assert(i != MAX_DEPTH);

return file;

}

// simple PGM (portable greyscale map) reader
void readPGMFile(const std::string& fileName, uint8_t buffer[INPUT_HINPUT_W])
{
std::ifstream infile(locateFile(fileName), std::ifstream::binary);
std::string magic, h, w, max;
infile >> magic >> h >> w >> max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char
>(buffer), INPUT_HINPUT_W);
}
///////此处跟例子不太一致/////////
float
GetinputData()
{
srand(unsigned(time(nullptr)));
uint8_t fileData[INPUT_H*INPUT_W];
readPGMFile(std::to_string(rand() % 10) + “.pgm”, fileData);

// print an ascii representation
std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl;
for (int i = 0; i < INPUT_H*INPUT_W; i++)
	std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");

// parse the mean file and 	subtract it from the image
IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto(locateFile("mnist_mean.binaryproto").c_str());
const float *meanData = reinterpret_cast<const float*>(meanBlob->getData());

float data[INPUT_H*INPUT_W];
for (int i = 0; i < INPUT_H*INPUT_W; i++)
	data[i] = float(fileData[i])-meanData[i];
return data;

}

void read_binary(const char *filepath, size_t size, float **data_d) {
std::ifstream data_file (filepath, std::ios::in | std::ios::binary);

if (!data_file)
{
    printf("Error opening file \n");
}
size_t size_b = size * sizeof(float);
float *data_h = new float;
if (!data_file.read((char*) data_h, size_b)) 
{
    printf("Error reading file %s\n",filepath);
    
}
// TODO: check CudaError
cudaMalloc((void**) data_d, size_b);
cudaMemcpy(*data_d, data_h, size_b, cudaMemcpyHostToDevice);
delete [] data_h;

}

void ConstructNet(std::ostream& gieModelStream)
{
IBuilder* builder = createInferBuilder(gLogger);
printf(“createInferBuilder …\n”);

INetworkDefinition* network = builder->createNetwork();
    
   /* const char *data_name="data";
    Dims3 data_input_dimensions;
    data_input_dimensions.c=1;
    data_input_dimensions.h=28;
    data_input_dimensions.w=28;
    ITensor* data_input= network->addInput(data_name, DataType::kFLOAT, data_input_dimensions);
   */
    
    const char *conv1_name="conv1";//IN--->(1,1,28,28)
    Dims3 conv1_input_dimensions;
    conv1_input_dimensions.c=1;
    conv1_input_dimensions.h=28;
    conv1_input_dimensions.w=28;
    ITensor* conv1_input= network->addInput(conv1_name, DataType::kFLOAT, conv1_input_dimensions);

    printf("1...\n");	

Dims2 conv1_kernelSize;//(5,5);
conv1_kernelSize.h=5;
    conv1_kernelSize.w=5;

float* conv1_kernel_buffers;
size_t conv1_kernel_inputSize = 1 * 20 * conv1_kernelSize.h * conv1_kernelSize.w * sizeof(float);//??(1,20,5,5)
size_t conv1_kernel_count=conv1_kernel_inputSize/sizeof(float);

CHECK(cudaMalloc((void**)&conv1_kernel_buffers, conv1_kernel_inputSize));
Weights conv1_kernelWeights;	
    read_binary("../data/conv1.bin",conv1_kernel_count,&conv1_kernel_buffers);
    conv1_kernelWeights.type= DataType::kFLOAT; 
conv1_kernelWeights.values=conv1_kernel_buffers;
    conv1_kernelWeights.count=conv1_kernel_count;

float* conv1_bias_buffers;
size_t conv1_bias_inputSize =1 * 20 * 1 * 1* sizeof(float);//??(20,1,1,1)
size_t conv1_bias_count=conv1_bias_inputSize /sizeof(float);
CHECK(cudaMalloc((void**)&conv1_bias_buffers, conv1_bias_inputSize));
    
Weights conv1_biasWeights;
    read_binary("../data/conv1_bias.bin",conv1_bias_count,&conv1_bias_buffers);
    conv1_biasWeights.type= DataType::kFLOAT;
conv1_biasWeights.values=conv1_bias_buffers;
    conv1_biasWeights.count=conv1_bias_count;
    printf("2...\n");
int conv1_nbOutputChannels=1;

//add convolution layer
IConvolutionLayer* conv1= network->addConvolution(conv1_input,conv1_nbOutputChannels,conv1_kernelSize,conv1_kernelWeights,conv1_biasWeights);
    Dims2 conv1_stride;
    conv1_stride.h=1;
    conv1_stride.w=1;
    conv1->setStride(conv1_stride);
    Dims2 conv1_pad;
    conv1_pad.h=0;
    conv1_pad.w=0;
    conv1->setPadding(conv1_pad);
    conv1->setNbGroups(1);
    printf("3...\n");
    
   
    Dims2 pool1_winSize;//(2,2);
pool1_winSize.h=2;
    pool1_winSize.w=2;
    
    
    const char *pool1_name="pool1";//IN--->(1,20,24,24)
    Dims3 pool1_input_dimensions;
    pool1_input_dimensions.c=20;
    pool1_input_dimensions.h=24;
    pool1_input_dimensions.w=24;
    
    
    ITensor* pool1_input= network->addInput(pool1_name, DataType::kFLOAT, pool1_input_dimensions);
    //add pooling layer
    IPoolingLayer* pool1= network->addPooling(pool1_input,PoolingType::kMAX,pool1_winSize);
    Dims2 pool1_stride;
    pool1_stride.h=2;
    pool1_stride.w=2;
    pool1->setStride(pool1_stride);
    Dims2 pool1_pad;
    pool1_pad.h=0;
    pool1_pad.w=0;
    pool1->setPadding(pool1_pad);        
          
    const char *conv2_name="conv2";//IN--->(1,20,12,12)
    Dims3 conv2_input_dimensions;
    conv2_input_dimensions.c=20;
    conv2_input_dimensions.h=12;
    conv2_input_dimensions.w=12;
    ITensor* conv2_input= network->addInput(conv2_name, DataType::kFLOAT, conv2_input_dimensions);
    
    printf("4...\n");	

Dims2 conv2_kernelSize;//(5,5);
conv2_kernelSize.h=5;
    conv2_kernelSize.w=5;

float* conv2_kernel_buffers;
size_t conv2_kernel_inputSize = 20 * 50 * conv2_kernelSize.h * conv2_kernelSize.w * sizeof(float);//??(20,50,5,5)
size_t conv2_kernel_count=conv2_kernel_inputSize/sizeof(float);

CHECK(cudaMalloc((void**)&conv2_kernel_buffers, conv2_kernel_inputSize));
Weights conv2_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);	
    read_binary("/home/ubuntu/code/GIE/GIE_TEST/lenet/data/conv2.bin",conv2_kernel_count,&conv2_kernel_buffers);
    conv2_kernelWeights.type= DataType::kFLOAT; 
conv2_kernelWeights.values=conv2_kernel_buffers;
    conv2_kernelWeights.count=conv2_kernel_count;

float* conv2_bias_buffers;
size_t conv2_bias_inputSize = 1 * 50 * 1 * 1* sizeof(float);//??
size_t conv2_bias_count=conv2_bias_inputSize /sizeof(float);
CHECK(cudaMalloc((void**)&conv2_bias_buffers, conv2_bias_inputSize));
    
Weights conv2_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
    read_binary("/home/ubuntu/code/GIE/GIE_TEST/lenet/data/conv2_bias.bin",conv2_bias_count,&conv2_bias_buffers);
    conv2_biasWeights.type= DataType::kFLOAT;
conv2_biasWeights.values=conv2_bias_buffers;
    conv2_biasWeights.count=conv2_bias_count;
    printf("5...\n");
int conv2_nbOutputChannels=1;

//add convolution layer
IConvolutionLayer* conv2= network->addConvolution(conv2_input,conv2_nbOutputChannels,conv2_kernelSize,conv2_kernelWeights,conv2_biasWeights);
    Dims2 conv2_stride;
    conv2_stride.h=1;
    conv2_stride.w=1;
    conv2->setStride(conv2_stride);
    Dims2 conv2_pad;
    conv2_pad.h=0;
    conv2_pad.w=0;
    conv2->setPadding(conv2_pad);
    conv2->setNbGroups(1);
    printf("6...\n");
   
    Dims2 pool2_winSize;
pool2_winSize.h=2;
    pool2_winSize.w=2;
    
    const char *pool2_name="pool2";//IN--->(1,50,8,8)
    Dims3 pool2_input_dimensions;
    pool2_input_dimensions.c=50;
    pool2_input_dimensions.h=8;
    pool2_input_dimensions.w=8;
    
    ITensor* pool2_input= network->addInput(pool2_name, DataType::kFLOAT, pool2_input_dimensions);
    //add pooling layer
    IPoolingLayer* pool2= network->addPooling(pool2_input,PoolingType::kMAX,pool2_winSize);
    Dims2 pool2_stride;
    pool2_stride.h=2;
    pool2_stride.w=2;
    pool2->setStride(pool2_stride);
    Dims2 pool2_pad;
    pool2_pad.h=0;
    pool2_pad.w=0;
    pool2->setPadding(pool2_pad);


    const char *ip1_name="ip1";//IN--->(1,50,4,4)
    Dims3 ip1_input_dimensions;
    ip1_input_dimensions.c=50;
    ip1_input_dimensions.h=4;
    ip1_input_dimensions.w=4;
    ITensor* ip1_input= network->addInput(ip1_name, DataType::kFLOAT, ip1_input_dimensions);
    
    printf("7...\n");	

Dims2 ip1_kernelSize;
ip1_kernelSize.h=4;
    ip1_kernelSize.w=4;

float* ip1_kernel_buffers;
size_t ip1_kernel_inputSize = 500 * 50 * ip1_kernelSize.h * ip1_kernelSize.w * sizeof(float);//?? (500,50,4,4)
size_t ip1_kernel_count=ip1_kernel_inputSize/sizeof(float);

CHECK(cudaMalloc((void**)&ip1_kernel_buffers, ip1_kernel_inputSize));
Weights ip1_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);	
    read_binary("../data/ip1.bin",ip1_kernel_count,&ip1_kernel_buffers);
    ip1_kernelWeights.type= DataType::kFLOAT; 
ip1_kernelWeights.values=ip1_kernel_buffers;
    ip1_kernelWeights.count=ip1_kernel_count;

float* ip1_bias_buffers;
size_t ip1_bias_inputSize = 500 * 1 * 1 * 1* sizeof(float);//??
size_t ip1_bias_count=ip1_bias_inputSize /sizeof(float);
CHECK(cudaMalloc((void**)&ip1_bias_buffers, ip1_bias_inputSize));
    
Weights ip1_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
    read_binary("../data/ip1_bias.bin",ip1_bias_count,&ip1_bias_buffers);
    ip1_biasWeights.type= DataType::kFLOAT;
ip1_biasWeights.values=ip1_bias_buffers;
    ip1_biasWeights.count=ip1_bias_count;
    printf("8...\n");
int ip1_nbOutputChannels=500;
    
//add convolution layer
IFullyConnectedLayer* ip1= network->addFullyConnected(ip1_input,ip1_nbOutputChannels,ip1_kernelWeights,ip1_biasWeights);
    printf("9...\n");
    
    const char *relu1_name="relu1";//IN--->(1,500,1,1)
    Dims3 relu1_input_dimensions;
    relu1_input_dimensions.c=500;
    relu1_input_dimensions.h=1;
    relu1_input_dimensions.w=1;
    ITensor* relu1_input= network->addInput(relu1_name, DataType::kFLOAT, relu1_input_dimensions);

    IActivationLayer* relu1= network->addActivation(relu1_input,ActivationType::kRELU);
    printf("10...\n");
    
    const char *ip2_name="ip2";//IN--->(1,500,1,1)
    Dims3 ip2_input_dimensions;
    ip2_input_dimensions.c=500;
    ip2_input_dimensions.h=1;
    ip2_input_dimensions.w=1;
    ITensor* ip2_input= network->addInput(ip2_name, DataType::kFLOAT, ip2_input_dimensions);
    
    printf("11...\n");	

Dims2 ip2_kernelSize;
ip2_kernelSize.h=1;
    ip2_kernelSize.w=1;

float* ip2_kernel_buffers;
size_t ip2_kernel_inputSize = 1 * 500 * ip2_kernelSize.h * ip2_kernelSize.w * sizeof(float);//??(20,500,1,1)
size_t ip2_kernel_count=ip2_kernel_inputSize/sizeof(float);

CHECK(cudaMalloc((void**)&ip2_kernel_buffers, ip2_kernel_inputSize));
Weights ip2_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);	
    read_binary("../data/ip2.bin",ip2_kernel_count,&ip2_kernel_buffers);
    ip2_kernelWeights.type= DataType::kFLOAT; 
ip2_kernelWeights.values=ip2_kernel_buffers;
    ip2_kernelWeights.count=ip2_kernel_count;

float* ip2_bias_buffers;
size_t ip2_bias_inputSize = 1 * 10 * 1 * 1* sizeof(float);//??
size_t ip2_bias_count=ip2_bias_inputSize /sizeof(float);
CHECK(cudaMalloc((void**)&ip2_bias_buffers, ip2_bias_inputSize));
    
Weights ip2_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
    read_binary("../data/ip2_bias.bin",ip2_bias_count,&ip2_bias_buffers);
    ip2_biasWeights.type= DataType::kFLOAT;
ip2_biasWeights.values=ip2_bias_buffers;
    ip2_biasWeights.count=ip2_bias_count;
    
    int ip2_nbOutputChannels=10;

//add convolution layer
IFullyConnectedLayer* ip2= network->addFullyConnected(ip2_input,ip2_nbOutputChannels,ip2_kernelWeights,ip2_biasWeights);
    
    const char *prob_name="prob";//IN--->(10,1,1,1)
    Dims3 prob_input_dimensions;
    prob_input_dimensions.c=10;
    prob_input_dimensions.h=1;
    prob_input_dimensions.w=1;
    ITensor* prob_input= network->addInput(prob_name, DataType::kFLOAT, prob_input_dimensions);

    ISoftMaxLayer* prob= network->addSoftMax(prob_input);
    
    network->markOutput(*conv1_input);
	
// Build the engine
builder->setMaxBatchSize(10);
builder->setMaxWorkspaceSize(1 << 20);

    builder->setHalf2Mode(false);
    printf("13...\n");


ICudaEngine* engine = builder->buildCudaEngine(*network);
//assert(engine);
    printf("14...\n");
// we don't need the network any more, and we can destroy the parser
network->destroy();

// serialize the engine, then close everything down
engine->serialize(gieModelStream);
engine->destroy();
builder->destroy();
    printf("finish ...\n");

}

void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
{
const ICudaEngine& engine = context.getEngine();
// input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
// of these, but in this case we know that there is exactly one input and one output.
printf(“engine.getNbBindings() :%d\n”,engine.getNbBindings() );
assert(engine.getNbBindings() == 2);
void* buffers[2];

// In order to bind the buffers, we need to know the names of the input and output tensors.
// note that indices are guaranteed to be less than IEngine::getNbBindings()
int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME), outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

// create GPU buffers and a stream
CHECK(cudaMalloc(&buffers[inputIndex], batchSize * INPUT_H * INPUT_W * sizeof(float)));
CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));

// DMA the input to the GPU,  execute the batch asynchronously, and DMA it back:
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE*sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);

// release the stream and the buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));

}

int main()
{
std::stringstream gieModelStream;
/////此处添加了constructnet////////
ConstructNet(gieModelStream);

   // read a random digit file
srand(unsigned(time(nullptr)));
uint8_t fileData[INPUT_H*INPUT_W];
readPGMFile(std::to_string(rand() % 10) + ".pgm", fileData);

// print an ascii representation
std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl;
for (int i = 0; i < INPUT_H*INPUT_W; i++)
	std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");

// parse the mean file and 	subtract it from the image
IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto("../data/mnist/mnist_mean.binaryproto");
const float *meanData = reinterpret_cast<const float*>(meanBlob->getData());

float data[INPUT_H*INPUT_W];
for (int i = 0; i < INPUT_H*INPUT_W; i++)
	data[i] = float(fileData[i])-meanData[i];

meanBlob->destroy();

   // deserialize the engine 
gieModelStream.seekg(0, gieModelStream.beg);
    //printf("main 2\n");
IRuntime* runtime = createInferRuntime(gLogger);
    printf("main 3\n");
ICudaEngine* engine = runtime->deserializeCudaEngine(gieModelStream);
    printf("main 4\n");
IExecutionContext *context = engine->createExecutionContext();      

// run inference
float prob[OUTPUT_SIZE];
doInference(*context, data, prob, 1);


context->destroy();
engine->destroy();
runtime->destroy();

}

cmake_minimum_required(VERSION 2.8)
project(gielenet)

set(SRC_TEST_FC lenet.cpp)
and the CMakeLists file:

add_executable(gielenet ${SRC_TEST_FC})
find_package(CUDA REQUIRED)
LIST(APPEND CMAKE_CXX_FLAGS “-std=c++11 -O3 -ffast-math -Wall”)
include_directories(
/usr/local/include
/usr/local/cuda-7.0/include/
${CMAKE_SOURCE_DIR}/include
)
link_directories(
/usr/local/lib
/usr/local/cuda/lib64
)
target_link_libraries(gielenet
${CUDA_LIBRARIES}
${CMAKE_SOURCE_DIR}/lib/libnvcaffe_parser.so
${CMAKE_SOURCE_DIR}/lib/libnvinfer.so
${CMAKE_SOURCE_DIR}/lib/libwconv.so
)

Hi sherrysherry,
There is a GIE sample in Multimedia API package. Are you able to run it without problem?

Multimedia API package? could you give me a link for the multimedia API package? i want to test the sameple or just give me a sample like my code(reconstruct a network using GIE API)

Hi sherrysherry,
Please install the package via jetpack and check samples at
/home/ubuntu/tegra_multimedia_api/samples/backend

CMakeLists.txt Fixed

cmake_minimum_required(VERSION 2.8)
project(gielenet)

set(SRC_TEST_FC lenet.cpp)

add_executable(gielenet ${SRC_TEST_FC})
find_package(CUDA REQUIRED)
LIST(APPEND CMAKE_CXX_FLAGS "-std=c++11 -O3 -ffast-math -Wall")
include_directories(
/usr/local/include
/usr/local/cuda-8.0/include/
${CMAKE_SOURCE_DIR}/include
)
link_directories(
/usr/local/lib
/usr/local/cuda/lib64
) 
target_link_libraries(gielenet
${CUDA_LIBRARIES}
${CMAKE_SOURCE_DIR}/lib/libnvcaffe_parser.so
${CMAKE_SOURCE_DIR}/lib/libnvinfer.so
${CMAKE_SOURCE_DIR}/lib/libwconv.so
)

As per the code

#include <assert.h>
#include <fstream>
#include <sstream>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <sys/stat.h>
#include <cmath>
#include <time.h>

#include <cuda.h>    
#include <cuda_runtime.h>   //CUDA RUNTIME

#include "NvInfer.h"           //Infer.h OLD
#include "NvCaffeParser.h"   // CaffeParser.h   OLD
#include <fstream>
#include <iostream>

using namespace nvinfer1;
using namespace nvcaffeparser1;

#define CHECK(status) \
        { \
                if (status != 0)  \
                { \
                        std::cout << "Cuda failure: " << status;  \
                        abort();  \
                } \
        }

// Logger for GIE info/warning/errors
class Logger : public ILogger
{
void log(Severity severity, const char* msg) override
{
        if (severity!=Severity::kINFO)
                std::cout << msg << std::endl;
}
} gLogger;




const int batchSize=1;
static const int INPUT_H = 28;
static const int INPUT_W = 28;
static const int OUTPUT_SIZE = 10;
/////////////////与源码修改之处 data转换为conv//////////
const char* INPUT_BLOB_NAME = "conv1";
const char* OUTPUT_BLOB_NAME = "prob";

std::string locateFile(const std::string& input)
{
        std::string file = "../data/mnist/" + input;
        struct stat info;
        int i, MAX_DEPTH = 10;
        for (i = 0; i < MAX_DEPTH && stat(file.c_str(), &info); i++)
                file = "../" + file;

        assert(i != MAX_DEPTH);

        return file;
}

// simple PGM (portable greyscale map) reader
void readPGMFile(const std::string& fileName, uint8_t buffer[INPUT_H*INPUT_W])
{
        std::ifstream infile(locateFile(fileName), std::ifstream::binary);
        std::string magic, h, w, max;
        infile >> magic >> h >> w >> max;
        infile.seekg(1, infile.cur);
        infile.read(reinterpret_cast<char*>(buffer), INPUT_H*INPUT_W);
}
///////此处跟例子不太一致/////////
float* GetinputData()
{
        srand(unsigned(time(nullptr)));
        uint8_t fileData[INPUT_H*INPUT_W];
        readPGMFile(std::to_string(rand() % 10) + ".pgm", fileData);

// print an ascii representation
        std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl;
        for (int i = 0; i < INPUT_H*INPUT_W; i++)
                std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");

// parse the mean file and subtract it from the image
        IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto(locateFile("mnist_mean.binaryproto").c_str());
        const float *meanData = reinterpret_cast<const float*>(meanBlob->getData());

        float data[INPUT_H*INPUT_W];
        for (int i = 0; i < INPUT_H*INPUT_W; i++)
                data[i] = float(fileData[i])-meanData[i];
        return data;
}

void read_binary(const char *filepath, size_t size, float **data_d) {
        std::ifstream data_file (filepath, std::ios::in | std::ios::binary);

        if (!data_file)
        {
                printf("Error opening file \n");
        }
        size_t size_b = size * sizeof(float);
        float *data_h = new float;
        if (!data_file.read((char*) data_h, size_b))
        {
                printf("Error reading file %s\n",filepath);

        }
// TODO: check CudaError
        cudaMalloc((void**) data_d, size_b);
        cudaMemcpy(*data_d, data_h, size_b, cudaMemcpyHostToDevice);
        delete [] data_h;
}

void ConstructNet(std::ostream& gieModelStream)
{
        IBuilder* builder = createInferBuilder(gLogger);
        printf("createInferBuilder ...\n");

        INetworkDefinition* network = builder->createNetwork();
        nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
/* const char *data_name="data";
   Dims3 data_input_dimensions;
   data_input_dimensions.c=1;
   data_input_dimensions.h=28;
   data_input_dimensions.w=28;
   ITensor* data_input= network->addInput(data_name, DataType::kFLOAT, data_input_dimensions);
 */

        const char *conv1_name="conv1";//IN--->(1,1,28,28)
        Dims3 conv1_input_dimensions;
        conv1_input_dimensions.c=1;
        conv1_input_dimensions.h=28;
        conv1_input_dimensions.w=28;
        ITensor* conv1_input= network->addInput(conv1_name, DataType::kFLOAT, conv1_input_dimensions);

        printf("1...\n");

        Dims2 conv1_kernelSize;//(5,5);
        conv1_kernelSize.h=5;
        conv1_kernelSize.w=5;

        float* conv1_kernel_buffers;
        size_t conv1_kernel_inputSize = 1 * 20 * conv1_kernelSize.h * conv1_kernelSize.w * sizeof(float);//??(1,20,5,5)
        size_t conv1_kernel_count=conv1_kernel_inputSize/sizeof(float);

        CHECK(cudaMalloc((void**)&conv1_kernel_buffers, conv1_kernel_inputSize));
        Weights conv1_kernelWeights;
        read_binary("../data/conv1.bin",conv1_kernel_count,&conv1_kernel_buffers);
        conv1_kernelWeights.type= DataType::kFLOAT;
        conv1_kernelWeights.values=conv1_kernel_buffers;
        conv1_kernelWeights.count=conv1_kernel_count;

        float* conv1_bias_buffers;
        size_t conv1_bias_inputSize =1 * 20 * 1 * 1* sizeof(float);//??(20,1,1,1)
        size_t conv1_bias_count=conv1_bias_inputSize /sizeof(float);
        CHECK(cudaMalloc((void**)&conv1_bias_buffers, conv1_bias_inputSize));

        Weights conv1_biasWeights;
        read_binary("../data/conv1_bias.bin",conv1_bias_count,&conv1_bias_buffers);
        conv1_biasWeights.type= DataType::kFLOAT;
        conv1_biasWeights.values=conv1_bias_buffers;
        conv1_biasWeights.count=conv1_bias_count;
        printf("2...\n");
        int conv1_nbOutputChannels=1;

//add convolution layer
        IConvolutionLayer* conv1= network->addConvolution(conv1_input,conv1_nbOutputChannels,conv1_kernelSize,conv1_kernelWeights,conv1_biasWeights);
        Dims2 conv1_stride;
        conv1_stride.h=1;
        conv1_stride.w=1;
        conv1->setStride(conv1_stride);
        Dims2 conv1_pad;
        conv1_pad.h=0;
        conv1_pad.w=0;
        conv1->setPadding(conv1_pad);
        conv1->setNbGroups(1);
        printf("3...\n");


        Dims2 pool1_winSize;//(2,2);
        pool1_winSize.h=2;
        pool1_winSize.w=2;


        const char *pool1_name="pool1";//IN--->(1,20,24,24)
        Dims3 pool1_input_dimensions;
        pool1_input_dimensions.c=20;
        pool1_input_dimensions.h=24;
        pool1_input_dimensions.w=24;


        ITensor* pool1_input= network->addInput(pool1_name, DataType::kFLOAT, pool1_input_dimensions);
//add pooling layer
        IPoolingLayer* pool1= network->addPooling(pool1_input,PoolingType::kMAX,pool1_winSize);
        Dims2 pool1_stride;
        pool1_stride.h=2;
        pool1_stride.w=2;
        pool1->setStride(pool1_stride);
        Dims2 pool1_pad;
        pool1_pad.h=0;
        pool1_pad.w=0;
        pool1->setPadding(pool1_pad);

        const char *conv2_name="conv2";//IN--->(1,20,12,12)
        Dims3 conv2_input_dimensions;
        conv2_input_dimensions.c=20;
        conv2_input_dimensions.h=12;
        conv2_input_dimensions.w=12;
        ITensor* conv2_input= network->addInput(conv2_name, DataType::kFLOAT, conv2_input_dimensions);

        printf("4...\n");

        Dims2 conv2_kernelSize;//(5,5);
        conv2_kernelSize.h=5;
        conv2_kernelSize.w=5;

        float* conv2_kernel_buffers;
        size_t conv2_kernel_inputSize = 20 * 50 * conv2_kernelSize.h * conv2_kernelSize.w * sizeof(float);//??(20,50,5,5)
        size_t conv2_kernel_count=conv2_kernel_inputSize/sizeof(float);

        CHECK(cudaMalloc((void**)&conv2_kernel_buffers, conv2_kernel_inputSize));
        Weights conv2_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);
        read_binary("/home/ubuntu/code/GIE/GIE_TEST/lenet/data/conv2.bin",conv2_kernel_count,&conv2_kernel_buffers);
        conv2_kernelWeights.type= DataType::kFLOAT;
        conv2_kernelWeights.values=conv2_kernel_buffers;
        conv2_kernelWeights.count=conv2_kernel_count;

        float* conv2_bias_buffers;
        size_t conv2_bias_inputSize = 1 * 50 * 1 * 1* sizeof(float);//??
        size_t conv2_bias_count=conv2_bias_inputSize /sizeof(float);
        CHECK(cudaMalloc((void**)&conv2_bias_buffers, conv2_bias_inputSize));

        Weights conv2_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
        read_binary("/home/ubuntu/code/GIE/GIE_TEST/lenet/data/conv2_bias.bin",conv2_bias_count,&conv2_bias_buffers);
        conv2_biasWeights.type= DataType::kFLOAT;
        conv2_biasWeights.values=conv2_bias_buffers;
        conv2_biasWeights.count=conv2_bias_count;
        printf("5...\n");
        int conv2_nbOutputChannels=1;

//add convolution layer
        IConvolutionLayer* conv2= network->addConvolution(conv2_input,conv2_nbOutputChannels,conv2_kernelSize,conv2_kernelWeights,conv2_biasWeights);
        Dims2 conv2_stride;
        conv2_stride.h=1;
        conv2_stride.w=1;
        conv2->setStride(conv2_stride);
        Dims2 conv2_pad;
        conv2_pad.h=0;
        conv2_pad.w=0;
        conv2->setPadding(conv2_pad);
        conv2->setNbGroups(1);
        printf("6...\n");

        Dims2 pool2_winSize;
        pool2_winSize.h=2;
        pool2_winSize.w=2;

        const char *pool2_name="pool2";//IN--->(1,50,8,8)
        Dims3 pool2_input_dimensions;
        pool2_input_dimensions.c=50;
        pool2_input_dimensions.h=8;
        pool2_input_dimensions.w=8;

        ITensor* pool2_input= network->addInput(pool2_name, DataType::kFLOAT, pool2_input_dimensions);
//add pooling layer
        IPoolingLayer* pool2= network->addPooling(pool2_input,PoolingType::kMAX,pool2_winSize);
        Dims2 pool2_stride;
        pool2_stride.h=2;
        pool2_stride.w=2;
        pool2->setStride(pool2_stride);
        Dims2 pool2_pad;
        pool2_pad.h=0;
        pool2_pad.w=0;
        pool2->setPadding(pool2_pad);


        const char *ip1_name="ip1";//IN--->(1,50,4,4)
        Dims3 ip1_input_dimensions;
        ip1_input_dimensions.c=50;
        ip1_input_dimensions.h=4;
        ip1_input_dimensions.w=4;
        ITensor* ip1_input= network->addInput(ip1_name, DataType::kFLOAT, ip1_input_dimensions);

        printf("7...\n");

        Dims2 ip1_kernelSize;
        ip1_kernelSize.h=4;
        ip1_kernelSize.w=4;

        float* ip1_kernel_buffers;
        size_t ip1_kernel_inputSize = 500 * 50 * ip1_kernelSize.h * ip1_kernelSize.w * sizeof(float);//?? (500,50,4,4)
        size_t ip1_kernel_count=ip1_kernel_inputSize/sizeof(float);

        CHECK(cudaMalloc((void**)&ip1_kernel_buffers, ip1_kernel_inputSize));
        Weights ip1_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);
        read_binary("../data/ip1.bin",ip1_kernel_count,&ip1_kernel_buffers);
        ip1_kernelWeights.type= DataType::kFLOAT;
        ip1_kernelWeights.values=ip1_kernel_buffers;
        ip1_kernelWeights.count=ip1_kernel_count;

        float* ip1_bias_buffers;
        size_t ip1_bias_inputSize = 500 * 1 * 1 * 1* sizeof(float);//??
        size_t ip1_bias_count=ip1_bias_inputSize /sizeof(float);
        CHECK(cudaMalloc((void**)&ip1_bias_buffers, ip1_bias_inputSize));

        Weights ip1_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
        read_binary("../data/ip1_bias.bin",ip1_bias_count,&ip1_bias_buffers);
        ip1_biasWeights.type= DataType::kFLOAT;
        ip1_biasWeights.values=ip1_bias_buffers;
        ip1_biasWeights.count=ip1_bias_count;
        printf("8...\n");
        int ip1_nbOutputChannels=500;

//add convolution layer
        IFullyConnectedLayer* ip1= network->addFullyConnected(ip1_input,ip1_nbOutputChannels,ip1_kernelWeights,ip1_biasWeights);
        printf("9...\n");

        const char *relu1_name="relu1";//IN--->(1,500,1,1)
        Dims3 relu1_input_dimensions;
        relu1_input_dimensions.c=500;
        relu1_input_dimensions.h=1;
        relu1_input_dimensions.w=1;
        ITensor* relu1_input= network->addInput(relu1_name, DataType::kFLOAT, relu1_input_dimensions);

        IActivationLayer* relu1= network->addActivation(relu1_input,ActivationType::kRELU);
        printf("10...\n");

        const char *ip2_name="ip2";//IN--->(1,500,1,1)
        Dims3 ip2_input_dimensions;
        ip2_input_dimensions.c=500;
        ip2_input_dimensions.h=1;
        ip2_input_dimensions.w=1;
        ITensor* ip2_input= network->addInput(ip2_name, DataType::kFLOAT, ip2_input_dimensions);

        printf("11...\n");

        Dims2 ip2_kernelSize;
        ip2_kernelSize.h=1;
        ip2_kernelSize.w=1;

        float* ip2_kernel_buffers;
        size_t ip2_kernel_inputSize = 1 * 500 * ip2_kernelSize.h * ip2_kernelSize.w * sizeof(float);//??(20,500,1,1)
        size_t ip2_kernel_count=ip2_kernel_inputSize/sizeof(float);

        CHECK(cudaMalloc((void**)&ip2_kernel_buffers, ip2_kernel_inputSize));
        Weights ip2_kernelWeights;//(conv1_kernel_buffers,conv1_kernel_count);
        read_binary("../data/ip2.bin",ip2_kernel_count,&ip2_kernel_buffers);
        ip2_kernelWeights.type= DataType::kFLOAT;
        ip2_kernelWeights.values=ip2_kernel_buffers;
        ip2_kernelWeights.count=ip2_kernel_count;

        float* ip2_bias_buffers;
        size_t ip2_bias_inputSize = 1 * 10 * 1 * 1* sizeof(float);//??
        size_t ip2_bias_count=ip2_bias_inputSize /sizeof(float);
        CHECK(cudaMalloc((void**)&ip2_bias_buffers, ip2_bias_inputSize));

        Weights ip2_biasWeights;//(conv1_bias_buffers,conv1_bias_count);
        read_binary("../data/ip2_bias.bin",ip2_bias_count,&ip2_bias_buffers);
        ip2_biasWeights.type= DataType::kFLOAT;
        ip2_biasWeights.values=ip2_bias_buffers;
        ip2_biasWeights.count=ip2_bias_count;

        int ip2_nbOutputChannels=10;

//add convolution layer
        IFullyConnectedLayer* ip2= network->addFullyConnected(ip2_input,ip2_nbOutputChannels,ip2_kernelWeights,ip2_biasWeights);

        const char *prob_name="prob";//IN--->(10,1,1,1)
        Dims3 prob_input_dimensions;
        prob_input_dimensions.c=10;
        prob_input_dimensions.h=1;
        prob_input_dimensions.w=1;
        ITensor* prob_input= network->addInput(prob_name, DataType::kFLOAT, prob_input_dimensions);

        ISoftMaxLayer* prob= network->addSoftMax(prob_input);

        network->markOutput(*conv1_input);

// Build the engine
        builder->setMaxBatchSize(10);
        builder->setMaxWorkspaceSize(1 << 20);

        builder->setHalf2Mode(false);
        printf("13...\n");


        ICudaEngine* engine = builder->buildCudaEngine(*network);
//assert(engine);
        printf("14...\n");
// we don't need the network any more, and we can destroy the parser
        network->destroy();

// serialize the engine, then close everything down
        engine->serialize(gieModelStream);
        engine->destroy();
        builder->destroy();
        printf("finish ...\n");

}

void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
{
        const ICudaEngine& engine = context.getEngine();
// input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
// of these, but in this case we know that there is exactly one input and one output.
        printf("engine.getNbBindings() :%d\n",engine.getNbBindings() );
        assert(engine.getNbBindings() == 2);
        void* buffers[2];

// In order to bind the buffers, we need to know the names of the input and output tensors.
// note that indices are guaranteed to be less than IEngine::getNbBindings()
        int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME), outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

// create GPU buffers and a stream
        CHECK(cudaMalloc(&buffers[inputIndex], batchSize * INPUT_H * INPUT_W * sizeof(float)));
        CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

        cudaStream_t stream;
        CHECK(cudaStreamCreate(&stream));

// DMA the input to the GPU, execute the batch asynchronously, and DMA it back:
        CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
        context.enqueue(batchSize, buffers, stream, nullptr);
        CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE*sizeof(float), cudaMemcpyDeviceToHost, stream));
        cudaStreamSynchronize(stream);

// release the stream and the buffers
        cudaStreamDestroy(stream);
        CHECK(cudaFree(buffers[inputIndex]));
        CHECK(cudaFree(buffers[outputIndex]));
}

int main()
{
        std::stringstream gieModelStream;
/////此处添加了constructnet////////
        ConstructNet(gieModelStream);

// read a random digit file
        srand(unsigned(time(nullptr)));
        uint8_t fileData[INPUT_H*INPUT_W];
        readPGMFile(std::to_string(rand() % 10) + ".pgm", fileData);

// print an ascii representation
        std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl;
        for (int i = 0; i < INPUT_H*INPUT_W; i++)
                std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");

// parse the mean file and subtract it from the image
        IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto("../data/mnist/mnist_mean.binaryproto");
        const float *meanData = reinterpret_cast<const float*>(meanBlob->getData());

        float data[INPUT_H*INPUT_W];
        for (int i = 0; i < INPUT_H*INPUT_W; i++)
                data[i] = float(fileData[i])-meanData[i];

        meanBlob->destroy();

// deserialize the engine
        gieModelStream.seekg(0, gieModelStream.beg);
//printf("main 2\n");
        IRuntime* runtime = createInferRuntime(gLogger);
        printf("main 3\n");
        ICudaEngine* engine = runtime->deserializeCudaEngine(gieModelStream);
        printf("main 4\n");
        IExecutionContext *context = engine->createExecutionContext();

// run inference
        float prob[OUTPUT_SIZE];
        doInference(*context, data, prob, 1);


        context->destroy();
        engine->destroy();
        runtime->destroy();
}
ubuntu@TX1_16:/Drive/$ make
-- Configuring done
-- Generating done
-- Build files have been written to: /Drive/
Scanning dependencies of target gielenet
[ 50%] Building CXX object CMakeFiles/gielenet.dir/lenet.cpp.o
/Drive//lenet.cpp: In function 'float* GetinputData()':
/Drive//lenet.cpp:88:38: error: 'CaffeParser' has not been declared
         IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto(locateFile("mnist_mean.binaryproto").c_str());
                                      ^
/Drive//lenet.cpp:91:15: warning: address of local variable 'data' returned [-Wreturn-local-addr]
         float data[INPUT_H*INPUT_W];
               ^
/Drive//lenet.cpp: In function 'void ConstructNet(std::ostream&)':
/Drive//lenet.cpp:170:148: error: no matching function for call to 'nvinfer1::INetworkDefinition::addConvolution(nvinfer1::ITensor*&, int&, nvinfer1::Dims2&, nvinfer1::Weights&, nvinfer1::Weights&)'
 v1= network->addConvolution(conv1_input,conv1_nbOutputChannels,conv1_kernelSize,conv1_kernelWeights,conv1_biasWeights);
                                                                                                                      ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1051:31: note: candidate: virtual nvinfer1::IConvolutionLayer* nvinfer1::INetworkDefinition::addConvolution(nvinfer1::ITensor&, int, nvinfer1::Dims2, nvinfer1::Weights, nvinfer1::Weights)
  virtual IConvolutionLayer*   addConvolution(ITensor& input, int nbOutputMaps, Dims2 kernelSize, Weights kernelWeights,
                               ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1051:31: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:197:94: error: no matching function for call to 'nvinfer1::INetworkDefinition::addPooling(nvinfer1::ITensor*&, nvinfer1::PoolingType, nvinfer1::Dims2&)'
         IPoolingLayer* pool1= network->addPooling(pool1_input,PoolingType::kMAX,pool1_winSize);
                                                                                              ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1093:28: note: candidate: virtual nvinfer1::IPoolingLayer* nvinfer1::INetworkDefinition::addPooling(nvinfer1::ITensor&, nvinfer1::PoolingType, nvinfer1::Dims2)
  virtual IPoolingLayer*    addPooling(ITensor& input, PoolingType type, Dims2 windowSize) = 0;
                            ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1093:28: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:245:148: error: no matching function for call to 'nvinfer1::INetworkDefinition::addConvolution(nvinfer1::ITensor*&, int&, nvinfer1::Dims2&, nvinfer1::Weights&, nvinfer1::Weights&)'
 v2= network->addConvolution(conv2_input,conv2_nbOutputChannels,conv2_kernelSize,conv2_kernelWeights,conv2_biasWeights);
                                                                                                                      ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1051:31: note: candidate: virtual nvinfer1::IConvolutionLayer* nvinfer1::INetworkDefinition::addConvolution(nvinfer1::ITensor&, int, nvinfer1::Dims2, nvinfer1::Weights, nvinfer1::Weights)
  virtual IConvolutionLayer*   addConvolution(ITensor& input, int nbOutputMaps, Dims2 kernelSize, Weights kernelWeights,
                               ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1051:31: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:269:94: error: no matching function for call to 'nvinfer1::INetworkDefinition::addPooling(nvinfer1::ITensor*&, nvinfer1::PoolingType, nvinfer1::Dims2&)'
         IPoolingLayer* pool2= network->addPooling(pool2_input,PoolingType::kMAX,pool2_winSize);
                                                                                              ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1093:28: note: candidate: virtual nvinfer1::IPoolingLayer* nvinfer1::INetworkDefinition::addPooling(nvinfer1::ITensor&, nvinfer1::PoolingType, nvinfer1::Dims2)
  virtual IPoolingLayer*    addPooling(ITensor& input, PoolingType type, Dims2 windowSize) = 0;
                            ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1093:28: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:318:127: error: no matching function for call to 'nvinfer1::INetworkDefinition::addFullyConnected(nvinfer1::ITensor*&, int&, nvinfer1::Weights&, nvinfer1::Weights&)'
 FullyConnectedLayer* ip1= network->addFullyConnected(ip1_input,ip1_nbOutputChannels,ip1_kernelWeights,ip1_biasWeights);
                                                                                                                      ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1068:33: note: candidate: virtual nvinfer1::IFullyConnectedLayer* nvinfer1::INetworkDefinition::addFullyConnected(nvinfer1::ITensor&, int, nvinfer1::Weights, nvinfer1::Weights)
  virtual IFullyConnectedLayer*  addFullyConnected(ITensor& input, int nbOutputs, Weights kernelWeights, Weights biasWei
                                 ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1068:33: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:328:90: error: no matching function for call to 'nvinfer1::INetworkDefinition::addActivation(nvinfer1::ITensor*&, nvinfer1::ActivationType)'
         IActivationLayer* relu1= network->addActivation(relu1_input,ActivationType::kRELU);
                                                                                          ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1080:30: note: candidate: virtual nvinfer1::IActivationLayer* nvinfer1::INetworkDefinition::addActivation(nvinfer1::ITensor&, nvinfer1::ActivationType)
  virtual IActivationLayer*   addActivation(ITensor& input, ActivationType type) = 0;
                              ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1080:30: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:369:127: error: no matching function for call to 'nvinfer1::INetworkDefinition::addFullyConnected(nvinfer1::ITensor*&, int&, nvinfer1::Weights&, nvinfer1::Weights&)'
 FullyConnectedLayer* ip2= network->addFullyConnected(ip2_input,ip2_nbOutputChannels,ip2_kernelWeights,ip2_biasWeights);
                                                                                                                      ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1068:33: note: candidate: virtual nvinfer1::IFullyConnectedLayer* nvinfer1::INetworkDefinition::addFullyConnected(nvinfer1::ITensor&, int, nvinfer1::Weights, nvinfer1::Weights)
  virtual IFullyConnectedLayer*  addFullyConnected(ITensor& input, int nbOutputs, Weights kernelWeights, Weights biasWei
                                 ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1068:33: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:378:60: error: no matching function for call to 'nvinfer1::INetworkDefinition::addSoftMax(nvinfer1::ITensor*&)'
         ISoftMaxLayer* prob= network->addSoftMax(prob_input);
                                                            ^
In file included from /Drive//lenet.cpp:15:0:
/usr/include/aarch64-linux-gnu/NvInfer.h:1133:28: note: candidate: virtual nvinfer1::ISoftMaxLayer* nvinfer1::INetworkDefinition::addSoftMax(nvinfer1::ITensor&)
  virtual ISoftMaxLayer*    addSoftMax(ITensor& input) = 0;
                            ^
/usr/include/aarch64-linux-gnu/NvInfer.h:1133:28: note:   no known conversion for argument 1 from 'nvinfer1::ITensor*' to 'nvinfer1::ITensor&'
/Drive//lenet.cpp:123:39: warning: unused variable 'parser' [-Wunused-variable]
         nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
                                       ^
/Drive//lenet.cpp:318:31: warning: unused variable 'ip1' [-Wunused-variable]
         IFullyConnectedLayer* ip1= network->addFullyConnected(ip1_input,ip1_nbOutputChannels,ip1_kernelWeights,ip1_bias
                               ^
/Drive//lenet.cpp:328:27: warning: unused variable 'relu1' [-Wunused-variable]
         IActivationLayer* relu1= network->addActivation(relu1_input,ActivationType::kRELU);
                           ^
/Drive//lenet.cpp:369:31: warning: unused variable 'ip2' [-Wunused-variable]
         IFullyConnectedLayer* ip2= network->addFullyConnected(ip2_input,ip2_nbOutputChannels,ip2_kernelWeights,ip2_bias
                               ^
/Drive//lenet.cpp:378:24: warning: unused variable 'prob' [-Wunused-variable]
         ISoftMaxLayer* prob= network->addSoftMax(prob_input);
                        ^
/Drive//lenet.cpp: In function 'int main()':
/Drive//lenet.cpp:453:38: error: 'CaffeParser' has not been declared
         IBinaryProtoBlob* meanBlob = CaffeParser::parseBinaryProto("../data/mnist/mnist_mean.binaryproto");
                                      ^
CMakeFiles/gielenet.dir/build.make:62: recipe for target 'CMakeFiles/gielenet.dir/lenet.cpp.o' failed
make[2]: *** [CMakeFiles/gielenet.dir/lenet.cpp.o] Error 1
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/gielenet.dir/all' failed
make[1]: *** [CMakeFiles/gielenet.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2