Hello
I have this inference code
void FaceNetClassifier::doInference(float* inputData, float* output) {
int size_of_single_input = 3 * 160 * 160 * sizeof(float);
int size_of_single_output = 128 * sizeof(float);
int inputIndex = m_engine->getBindingIndex("input");
int outputIndex = m_engine->getBindingIndex("embeddings");
void* buffers[2];
cudaMalloc(&buffers[inputIndex], m_batchSize * size_of_single_input);
cudaMalloc(&buffers[outputIndex], m_batchSize * size_of_single_output);
if (buffers[outputIndex] == nullptr)
{
std::cout << "Out of memory" << std::endl;
//exit(1);
}else{
std::cout << "Memory allocated" << std::endl;
}
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// copy data to GPU and execute
cout<<"fails here"<<endl;
CHECK(cudaMemcpyAsync(buffers[inputIndex], inputData, m_batchSize * size_of_single_input, cudaMemcpyHostToDevice, stream));
cout<<"coped data to GPU"<<endl;
m_context->enqueue(m_batchSize, &buffers[0], stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], m_batchSize * size_of_single_output, cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release the stream and the buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}
and below is the application output in Qt
facenet engine created/loaded:
Parsing Directory: ../imgs
closed images Directory:
before facenet:
before inference
Memory allocated
fails here
The program has unexpectedly finished.
The process was ended forcefully.
/home/obed/Documents/qtprojects/build-SmartGate_QtApp-Desktop-Debug/SmartGate_QtApp crashed.
The code seems to fail at the cudaMemcopyAsyc function but i cannot tell why because it does not even raise a cuda error. What could i be doing wrong?
I am using TensorRT 6.0.1, CUDA 10.0,
The engine was successfully generated but execution terminates at inference.
Please help, i am a beginner.