Hello everyone,
The following program suppose to process live stream but I get segmentation fault after the first frame of the video. I think I need to free the cuda memory allocated after every frame but I could not find a correct way to do it.
Inference code:
void doInference(IExecutionContext & context, cudaStream_t & stream, void ** buffers, float * input, float * output, int batchSize) {
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
Main Code :
int main(int argc, char ** argv) {
cudaSetDevice(DEVICE);
// create a model using the API directly and serialize it to a stream
char * trtModelStream {
nullptr
};
size_t size {
0
};
std::string engine_name = STR2(NET);
engine_name = "best_new_640" + engine_name + ".engine";
if (argc == 2 && std::string(argv[1]) == "-s") {
IHostMemory * modelStream {
nullptr
};
APIToModel(BATCH_SIZE, & modelStream);
assert(modelStream != nullptr);
std::ofstream p(engine_name, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast <
const char * > (modelStream -> data()), modelStream -> size());
modelStream -> destroy();
return 0;
} else if (argc == 3 && std::string(argv[1]) == "-d") {
std::ifstream file(engine_name, std::ios::binary);
if (file.good()) {
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
}
} else {
std::cerr << "arguments not right!" << std::endl;
std::cerr << "./yolov5 -s // serialize model to plan file" << std::endl;
std::cerr << "./yolov5 -d ../samples // deserialize plan file and run inference" << std::endl;
return -1;
}
// prepare input data ---------------------------
static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime * runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine * engine = runtime -> deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext * context = engine -> createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine -> getNbBindings() == 2);
void * buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine -> getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine -> getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
std::cout << "input index is:" << inputIndex << std::endl;
std::cout << "output index is:" << outputIndex << std::endl;
// Create GPU buffers on device
CHECK(cudaMalloc( & buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CHECK(cudaMalloc( & buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CHECK(cudaStreamCreate( & stream));
/*
This part is added after.
*/
cv::VideoCapture cap("/home/nvidia/Downloads/2.mp4");
// if not success, exit program
if (cap.isOpened() == false) {
std::cout << "Cannot open the video file" << std::endl;
std::cin.get(); //wait for any key press
return -1;
}
//get the frames rate of the video
double fps = cap.get(cv::CAP_PROP_FPS);
std::cout << "Frames per seconds : " << fps << std::endl;
std::string window_name = "My First Video";
cv::namedWindow(window_name, cv::WINDOW_NORMAL); //create a window
int n = 0;
float flat_array[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
/*
Until here.
*/
while (1) {
// Run inference
cap.grab();
cv::Mat frame;
bool bSuccess = cap.read(frame);
//Breaking the while loop at the end of the video
if (bSuccess == false) {
std::cout << "Found the end of the video" << std::endl;
break;
}
if (n % 1 == 0) {
cv::Mat pr_img = preprocess_img(frame); // letterbox BGR to RGB
int i, b = 0;
cv::imshow("test array", pr_img);
size_t sizeInBytes = pr_img.total() * pr_img.elemSize();
std::cout << sizeInBytes << std::endl;
std::cout << COUNT_OF(pr_img.data) << std::endl;
//uchar* uc_pixel=nullptr;
for (int row = 0; row < INPUT_H; ++row) {
uchar * uc_pixel = pr_img.data + row * pr_img.step;
//std::cout<<"__ROW "<<row<<std::endl;
for (int col = 0; col < INPUT_W; ++col) {
data[b * 3 * INPUT_H * INPUT_W + i] = (float) uc_pixel[2] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float) uc_pixel[1] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float) uc_pixel[0] / 255.0;
uc_pixel += 3;
//std::cout<<"INPUT_W "<<col<<"ROW "<<row<<std::endl;
++i;
}
}
std::cout << "data_length" << COUNT_OF(data);
// auto start = std::chrono::system_clock::now();
doInference( * context, stream, buffers, data, prob, BATCH_SIZE);
// auto end = std::chrono::system_clock::now();
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// std::vector<std::vector<Yolo::Detection>> batch_res(1);
// for (int b = 0; b < 1; b++) {
// auto& res = batch_res[b];
// nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
// }
// auto& res = batch_res[b];
// //std::cout << res.size() << std::endl;
// //cv::Mat img = cv::imread(std::string(argv[2]) + "/" + file_names[f - fcount + 1 + b]);
// for (size_t j = 0; j < res.size(); j++) {
// cv::Rect r = get_rect(frame, res[j].bbox);
// cv::rectangle(frame, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
// cv::putText(frame, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
// }
cv::imshow(window_name, frame);
if (cv::waitKey(10) == 27) {
std::cout << "Esc key is pressed by user. Stoppig the video" << std::endl;
break;
}
std::cout << "image processed" << std::endl;
}
n++;
}
// Release stream and buffers
// cudaStreamDestroy(stream);
// CHECK(cudaFree(buffers[inputIndex]));
// CHECK(cudaFree(buffers[outputIndex]));
// // Destroy the engine
// context->destroy();
// engine->destroy();
// runtime->destroy();
return 0;
}
Segmentation Faults where the data
buffer feeds in to the doInference
function in a video stream.