Yes, the measured time is only for the remap algorithm.
Sure, here is an example:
I create the map with opencv as you can see in this function:
void RectifyUtils::generate_map() {
// create undistortion and rectification maps for left and right camera
cv::Size image_size(_input_frame_width, _input_frame_height);
cv::stereoRectify(_M1, _D1, _M2, _D2, image_size, _R, _T, _R1, _R2, _P1, _P2, _Q,
cv::CALIB_ZERO_DISPARITY, 1.0, image_size, &_validRoi[0], &_validRoi[1]);
cv::initUndistortRectifyMap(_M1, _D1, _R1, _P1, image_size, CV_32FC1, _map[0][0], _map[0][1]);
cv::initUndistortRectifyMap(_M2, _D2, _R2, _P2, image_size, CV_32FC1, _map[1][0], _map[1][1]);
memset(&_warp_map_m1, 0, sizeof(_warp_map_m1));
memset(&_warp_map_m2, 0, sizeof(_warp_map_m2));
_warp_map_m1.grid.numHorizRegions = 1;
_warp_map_m1.grid.numVertRegions = 1;
_warp_map_m1.grid.regionWidth[0] = _input_frame_width;
_warp_map_m1.grid.regionHeight[0] = _input_frame_height;
_warp_map_m1.grid.horizInterval[0] = 1;
_warp_map_m1.grid.vertInterval[0] = 1;
_warp_map_m2.grid.numHorizRegions = 1;
_warp_map_m2.grid.numVertRegions = 1;
_warp_map_m2.grid.regionWidth[0] = _input_frame_width;
_warp_map_m2.grid.regionHeight[0] = _input_frame_height;
_warp_map_m2.grid.horizInterval[0] = 1;
_warp_map_m2.grid.vertInterval[0] = 1;
vpiWarpMapAllocData(&_warp_map_m1);
check_status(vpiWarpMapGenerateIdentity(&_warp_map_m1), "vpiWarpMapGenerateIdentity");
vpiWarpMapAllocData(&_warp_map_m2);
check_status(vpiWarpMapGenerateIdentity(&_warp_map_m2), "vpiWarpMapGenerateIdentity");
// fill up maps for master and slave transformation
for (int i = 0; i < _map[0][0].rows; i++)
{
VPIKeypoint *row_master = (VPIKeypoint *)((uint8_t *)_warp_map_m1.keypoints + _warp_map_m1.pitchBytes * i);
VPIKeypoint *row_slave = (VPIKeypoint *)((uint8_t *)_warp_map_m2.keypoints + _warp_map_m2.pitchBytes * i);
int j;
for (j = 0; j < _map[0][0].cols; j++)
{
row_master[j].x = _map[1][0].at<float>(i,j);
row_master[j].y = _map[1][1].at<float>(i,j);
row_slave[j].x = _map[0][0].at<float>(i,j);
row_slave[j].y = _map[0][1].at<float>(i,j);
}
}
check_status(vpiCreateRemap(VPI_BACKEND_CUDA, &_warp_map_m1, &_warp_m1), "vpiCreateRemap");
check_status(vpiCreateRemap(VPI_BACKEND_CUDA, &_warp_map_m2, &_warp_m2), "vpiCreateRemap");
}
This is how I wrap the EGL Image to VPI:
(Note: FrameConatainer is a struct containing NvBufSurface and NvDsFrameMeta)
int RectifyUtils::frame_cont_to_vpi(std::vector<FrameContainer> *frame_cont)
{
int temp_batch_size = 0;
for(size_t n = 0; n < frame_cont->size(); n++) {
if (frame_cont->at(n).data.nvds.meta->source_id == 0)
{
// map master frame as egl image
if (NvBufSurfaceMapEglImage(frame_cont->at(n).data.nvds.surface, 0))
{
GST_ERROR("Failed to map EGLImage");
return -1;
}
_vct_input_imgs.at(n).source_id = 0;
if (!_vct_input_imgs.at(n).is_wrapper_created)
{
check_status(vpiImageCreateEGLImageWrapper(frame_cont->at(n).data.nvds.surface->surfaceList[n].mappedAddr.eglImage,
nullptr, VPI_BACKEND_CUDA | VPI_BACKEND_VIC , &_vct_input_imgs.at(n).img), "vpiImageCreateEGLImageWrapper");
_vct_input_imgs.at(n).is_wrapper_created = true;
} else {
check_status(vpiImageSetWrappedEGLImage(_vct_input_imgs.at(n).img, frame_cont->at(n).data.nvds.surface->surfaceList[0].mappedAddr.eglImage), "vpiImageSetWrappedEGLImage");
}
} else {
// map slave frame as egl image
if (NvBufSurfaceMapEglImage(frame_cont->at(n).data.nvds.surface, (0+n)))
{
GST_ERROR("Failed to map EGLImage");
return -1;
}
_vct_input_imgs.at(n).source_id = 1;
if (!_vct_input_imgs.at(n).is_wrapper_created)
{
check_status(vpiImageCreateEGLImageWrapper(frame_cont->at(n).data.nvds.surface->surfaceList[0+n].mappedAddr.eglImage,
nullptr, VPI_BACKEND_CUDA | VPI_BACKEND_VIC , &_vct_input_imgs.at(n).img), "vpiImageCreateEGLImageWrapper");
_vct_input_imgs.at(n).is_wrapper_created = true;
} else {
check_status(vpiImageSetWrappedEGLImage(_vct_input_imgs.at(n).img, frame_cont->at(n).data.nvds.surface->surfaceList[0+n].mappedAddr.eglImage), "vpiImageSetWrappedEGLImage");
}
}
temp_batch_size++;
}
return temp_batch_size;
}
And this is how I run the Remap algorithm:
(Note: this function takes up to 4.5 - 5.5 ms. _current_batch_size is 2)
void RectifyUtils::undistort_rectify()
{
for (uint8_t i = 0; i < _current_batch_size; i++)
{
if (_vct_input_imgs.at(i).source_id == 0)
{
check_status(vpiSubmitRemap(_cuda_stream, VPI_BACKEND_CUDA, _warp_m1, _vct_input_imgs.at(i).img, _vct_rectified_imgs.at(i).img, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0), "vpiSubmitRemap");
} else {
check_status(vpiSubmitRemap(_cuda_stream, VPI_BACKEND_CUDA, _warp_m2, _vct_input_imgs.at(i).img, _vct_rectified_imgs.at(i).img, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0), "vpiSubmitRemap");
}
_vct_rectified_imgs.at(i).source_id = _vct_input_imgs.at(i).source_id;
}
vpiStreamSync(_cuda_stream);
}