How to perform fish-eye lens distortion correction in gstreamer pipeline? (Hfov ~150)

I cannot comment about NVIDIA’s evaluation, but it should not be that difficult to apply correction with GPU once you have the opencv correction maps for x and y in float format.

You would first need an opencv version built with CUDA support. Here I’ve been using a 4.2.0 dev version.

This example is a simplified version of nvivafilter plugin. Its sources are available in public_sources.tbz2.

Basically, this example uses constant 640x480 resolution. So you would declare these const and variables:

#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/cudawarping.hpp" 

const int max_width = 640;
const int max_height = 480;
static cv::cuda::GpuMat gpu_xmap, gpu_ymap;

In Init() function you would set your xmap and ymap (load your ones the way you want):

init (CustomerFunction * pFuncs)
{
  pFuncs->fPreProcess = pre_process;
  pFuncs->fGPUProcess = gpu_process;
  pFuncs->fPostProcess = post_process;

  /* Initialize maps from CPU. */
  cv::Mat xmap(max_height, max_width, CV_32FC1);
  cv::Mat ymap(max_height, max_width, CV_32FC1);

  //fill matrices with your values
  cv::Mat cam(3, 3, cv::DataType<float>::type);
  cam.at<float>(0, 0) = 528.53618582196384f;
  cam.at<float>(0, 1) = 0.0f;
  cam.at<float>(0, 2) = 314.01736116032430f;

  cam.at<float>(1, 0) = 0.0f;
  cam.at<float>(1, 1) = 532.01912214324500f;
  cam.at<float>(1, 2) = 231.43930864205211f;

  cam.at<float>(2, 0) = 0.0f;
  cam.at<float>(2, 1) = 0.0f;
  cam.at<float>(2, 2) = 1.0f;

  cv::Mat dist(4, 1, cv::DataType<float>::type);  
  dist.at<float>(0, 0) = -0.11839989180635836f;
  dist.at<float>(1, 0) = 0.25425420873955445f;
  dist.at<float>(2, 0) = 0.0013269901775205413f;
  dist.at<float>(3, 0) = 0.0015787467748277866f;

  cv::fisheye::initUndistortRectifyMap(cam, dist, cv::Mat(), cam, cv::Size(max_width, max_height), CV_32FC1, xmap, ymap);

  /* upload to GpuMats */
  gpu_xmap.upload(xmap);
  gpu_ymap.upload(ymap);
}

Once this is done, it’s ready for remapping frames. You would process each frame this way:

static void cv_process_RGBA(void *pdata, int32_t width, int32_t height)
{
    cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, pdata);
    cv::cuda::GpuMat d_Mat_RGBA_Src;
    d_Mat_RGBA.copyTo(d_Mat_RGBA_Src); // cannot avoid one copy
    cv::cuda::remap(d_Mat_RGBA_Src, d_Mat_RGBA, gpu_xmap, gpu_ymap, cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0.f, 0.f, 0.f, 0.f));

    // Check
    if(d_Mat_RGBA.data != pdata)
	std::cerr << "Error reallocated buffer for d_Mat_RGBA" << std::endl;
}

Last thing would be to call this processing when an RGBA (or ABGR) frame is received. In function gpu_process(), you would change the relevant section to:

  if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
    if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_ABGR) {
 	cv_process_RGBA(eglFrame.frame.pPitch[0], eglFrame.width, eglFrame.height);
    } else if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
      printf ("Invalid eglcolorformat NV12\n");
    } else
      printf ("Invalid eglcolorformat %d\n", eglFrame.eglColorFormat);
  }

Note that in older L4T releases, it was instead CU_EGL_COLOR_FORMAT_BGRA and the codes did change, so it is not binary compatible between versions.

Adapt the makefile to your opencv install directory :

CVCCFLAGS:=-I$(OPENCV_DIR)/include/opencv4
CVLDFLAGS:=-L$(OPENCV_DIR)/lib -lopencv_core -lopencv_calib3d  -lopencv_cudawarping

Build with make and test with:

export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$OPENCV_DIR/lib
gst-launch-1.0 videotestsrc ! video/x-raw, width=640, height=480, framerate=30/1 ! nvvidconv ! 'video/x-raw(memory:NVMM), format=NV12, width=640, height=480' ! nvivafilter customer-lib-name=./lib-gst-custom-opencv_cudaprocess.so cuda-process=true ! 'video/x-raw(memory:NVMM), format=RGBA, width=640, height=480' ! nvoverlaysink
Attachments

Main source to be saved as gst-custom-opencv_cudaprocess.cu:

/*
 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <iostream>

#include <cuda.h>

#include "opencv2/core.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/cudawarping.hpp" 

#include "cudaEGL.h"

#if defined(__cplusplus)
extern "C" void Handle_EGLImage (EGLImageKHR image);
extern "C" {
#endif

typedef enum {
  COLOR_FORMAT_Y8 = 0,
  COLOR_FORMAT_U8_V8,
  COLOR_FORMAT_RGBA,
  COLOR_FORMAT_NONE
} ColorFormat;

typedef struct {
  /**
  * cuda-process API
  *
  * @param image   : EGL Image to process
  * @param userPtr : point to user alloc data, should be free by user
  */
  void (*fGPUProcess) (EGLImageKHR image, void ** userPtr);

  /**
  * pre-process API
  *
  * @param sBaseAddr  : Mapped Surfaces(YUV) pointers
  * @param smemsize   : surfaces size array
  * @param swidth     : surfaces width array
  * @param sheight    : surfaces height array
  * @param spitch     : surfaces pitch array
  * @param sformat    : surfaces format array
  * @param nsurfcount : surfaces count
  * @param userPtr    : point to user alloc data, should be free by user
  */
  void (*fPreProcess)(void **sBaseAddr,
                      unsigned int *smemsize,
                      unsigned int *swidth,
                      unsigned int *sheight,
                      unsigned int *spitch,
                      ColorFormat *sformat,
                      unsigned int nsurfcount,
                      void ** userPtr);

  /**
  * post-process API
  *
  * @param sBaseAddr  : Mapped Surfaces(YUV) pointers
  * @param smemsize   : surfaces size array
  * @param swidth     : surfaces width array
  * @param sheight    : surfaces height array
  * @param spitch     : surfaces pitch array
  * @param sformat    : surfaces format array
  * @param nsurfcount : surfaces count
  * @param userPtr    : point to user alloc data, should be free by user
  */
  void (*fPostProcess)(void **sBaseAddr,
                      unsigned int *smemsize,
                      unsigned int *swidth,
                      unsigned int *sheight,
                      unsigned int *spitch,
                      ColorFormat *sformat,
                      unsigned int nsurfcount,
                      void ** userPtr);
} CustomerFunction;

void init (CustomerFunction * pFuncs);

#if defined(__cplusplus)
}
#endif


/**
  * Dummy custom pre-process API implematation.
  * It just access mapped surface userspace pointer &
  * memset with specific pattern modifying pixel-data in-place.
  *
  * @param sBaseAddr  : Mapped Surfaces pointers
  * @param smemsize   : surfaces size array
  * @param swidth     : surfaces width array
  * @param sheight    : surfaces height array
  * @param spitch     : surfaces pitch array
  * @param nsurfcount : surfaces count
  */
static void
pre_process (void **sBaseAddr,
                unsigned int *smemsize,
                unsigned int *swidth,
                unsigned int *sheight,
                unsigned int *spitch,
                ColorFormat  *sformat,
                unsigned int nsurfcount,
                void ** usrptr)
{
  /* add your custom pre-process here
     we draw a green block for demo */
   printf ("pre-process %dx%d size %d\n", *swidth, *sheight, *smemsize); 
}

/**
  * Dummy custom post-process API implematation.
  * It just access mapped surface userspace pointer &
  * memset with specific pattern modifying pixel-data in-place.
  *
  * @param sBaseAddr  : Mapped Surfaces pointers
  * @param smemsize   : surfaces size array
  * @param swidth     : surfaces width array
  * @param sheight    : surfaces height array
  * @param spitch     : surfaces pitch array
  * @param nsurfcount : surfaces count
  */
static void
post_process (void **sBaseAddr,
                unsigned int *smemsize,
                unsigned int *swidth,
                unsigned int *sheight,
                unsigned int *spitch,
                ColorFormat  *sformat,
                unsigned int nsurfcount,
                void ** usrptr)
{
  /* add your custom post-process here
     we draw a green block for demo */
   printf ("post-process %dx%d size %d\n", *swidth, *sheight, *smemsize); 
}



static cv::cuda::GpuMat gpu_xmap, gpu_ymap;

static void cv_process_RGBA(void *pdata, int32_t width, int32_t height)
{
    cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, pdata);
    cv::cuda::GpuMat d_Mat_RGBA_Src;
    d_Mat_RGBA.copyTo(d_Mat_RGBA_Src); // cannot avoid one copy
    cv::cuda::remap(d_Mat_RGBA_Src, d_Mat_RGBA, gpu_xmap, gpu_ymap, cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0.f, 0.f, 0.f, 0.f));

    // Check
    if(d_Mat_RGBA.data != pdata)
	std::cerr << "Error reallocated buffer for d_Mat_RGBA" << std::endl;
}



/**
  * Performs CUDA Operations on egl image.
  *
  * @param image : EGL image
  */
static void
gpu_process (EGLImageKHR image, void ** usrptr)
{
  CUresult status;
  CUeglFrame eglFrame;
  CUgraphicsResource pResource = NULL;

  cudaFree(0);
  status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);

  if (status != CUDA_SUCCESS) {
    printf("cuGraphicsEGLRegisterImage failed : %d \n", status);
    return;
  }

  status = cuGraphicsResourceGetMappedEglFrame( &eglFrame, pResource, 0, 0);
  if (status != CUDA_SUCCESS) {
    printf ("cuGraphicsSubResourceGetMappedArray failed\n");
  }

  status = cuCtxSynchronize();
  if (status != CUDA_SUCCESS) {
    printf ("cuCtxSynchronize failed \n");
  }

  if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
    if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_ABGR) {
 	cv_process_RGBA(eglFrame.frame.pPitch[0], eglFrame.width, eglFrame.height);
    } else if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
      printf ("Invalid eglcolorformat NV12\n");
    } else
      printf ("Invalid eglcolorformat %d\n", eglFrame.eglColorFormat);
  }

  status = cuCtxSynchronize();
  if (status != CUDA_SUCCESS) {
    printf ("cuCtxSynchronize failed after memcpy \n");
  }

  status = cuGraphicsUnregisterResource(pResource);
  if (status != CUDA_SUCCESS) {
    printf("cuGraphicsEGLUnRegisterResource failed: %d \n", status);
  }
}

const int max_width = 640;
const int max_height = 480;

extern "C" void
init (CustomerFunction * pFuncs)
{
  pFuncs->fPreProcess = pre_process;
  pFuncs->fGPUProcess = gpu_process;
  pFuncs->fPostProcess = post_process;

  /* Initialize maps from CPU */
  cv::Mat xmap(max_height, max_width, CV_32FC1);
  cv::Mat ymap(max_height, max_width, CV_32FC1);

   //fill matrices
  cv::Mat cam(3, 3, cv::DataType<float>::type);
  cam.at<float>(0, 0) = 528.53618582196384f;
  cam.at<float>(0, 1) = 0.0f;
  cam.at<float>(0, 2) = 314.01736116032430f;

  cam.at<float>(1, 0) = 0.0f;
  cam.at<float>(1, 1) = 532.01912214324500f;
  cam.at<float>(1, 2) = 231.43930864205211f;

  cam.at<float>(2, 0) = 0.0f;
  cam.at<float>(2, 1) = 0.0f;
  cam.at<float>(2, 2) = 1.0f;

  cv::Mat dist(4, 1, cv::DataType<float>::type);  
  dist.at<float>(0, 0) = -0.11839989180635836f;
  dist.at<float>(1, 0) = 0.25425420873955445f;
  dist.at<float>(2, 0) = 0.0013269901775205413f;
  dist.at<float>(3, 0) = 0.0015787467748277866f;

  cv::fisheye::initUndistortRectifyMap(cam, dist, cv::Mat(), cam, cv::Size(max_width, max_height), CV_32FC1, xmap, ymap);

  /* upload to GpuMats */
  gpu_xmap.upload(xmap);
  gpu_ymap.upload(ymap);
}

extern "C" void
deinit (void)
{

}

Makefile:

###############################################################################
#
# Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
###############################################################################

# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
INCLUDE_DIR = /usr/include
LIB_DIR = /usr/lib/aarch64-linux-gnu
TEGRA_LIB_DIR = /usr/lib/aarch64-linux-gnu/tegra
OPENCV_DIR = /usr/local/opencv-github-4.2.0-dev

# For hardfp
#LIB_DIR = /usr/lib/arm-linux-gnueabihf
#TEGRA_LIB_DIR = /usr/lib/arm-linux-gnueabihf/tegra

OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]")
OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")

OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")

GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)

# internal flags
NVCCFLAGS   := --shared -std=c++11
CCFLAGS     := -fPIC  -std=c++11
CVCCFLAGS:=-I$(OPENCV_DIR)/include/opencv4
CVLDFLAGS:=-L$(OPENCV_DIR)/lib -lopencv_core -lopencv_calib3d -lopencv_cudawarping

LDFLAGS     :=

# Extra user flags
EXTRA_NVCCFLAGS   ?=
EXTRA_LDFLAGS     ?=
EXTRA_CCFLAGS     ?=

override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1

# For hardfp
#override abi := gnueabihf
#LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
#CCFLAGS += -mfloat-abi=hard

ifeq ($(ARMv7),1)
NVCCFLAGS += -target-cpu-arch ARM
ifneq ($(TARGET_FS),)
CCFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/$(abi)-linux-gnu

# For hardfp
#LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi)

endif
endif

# Debug build flags
dbg = 0
ifeq ($(dbg),1)
      NVCCFLAGS += -g -G
      TARGET := debug
else
      TARGET := release
endif

ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))

ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))

# Common includes and paths for CUDA
INCLUDES  := -I./
LIBRARIES := -L$(LIB_DIR) -lEGL -lGLESv2
LIBRARIES += -L$(TEGRA_LIB_DIR) -lcuda -lrt

################################################################################

# CUDA code generation flags
ifneq ($(OS_ARCH),armv7l)
GENCODE_SM10    := -gencode arch=compute_10,code=sm_10
endif
GENCODE_SM20    := -gencode arch=compute_20,code=sm_20
GENCODE_SM30    := -gencode arch=compute_30,code=sm_30
GENCODE_SM32    := -gencode arch=compute_32,code=sm_32
GENCODE_SM35    := -gencode arch=compute_35,code=sm_35
GENCODE_SM50    := -gencode arch=compute_50,code=sm_50
GENCODE_SMXX    := -gencode arch=compute_50,code=compute_50
GENCODE_SM53    := -gencode arch=compute_53,code=compute_53  # for TX1 / Nano
GENCODE_SM62    := -gencode arch=compute_62,code=compute_62  # for TX2
GENCODE_SM72    := -gencode arch=compute_72,code=compute_72  # for Xavier

ifeq ($(OS_ARCH),armv7l)
# This only supports TK1(3.2) -like architectures
GENCODE_FLAGS   ?= $(GENCODE_SM32)
else
# This only support TX1/Nano(5.3) or TX2(6.2) or Xavier(7.2) -like architectures
GENCODE_FLAGS   ?= $(GEGENCODE_SM53) $(GENCODE_SM62) $(GENCODE_SM72)   
endif

# Target rules
all: build

build: lib-gst-custom-opencv_cudaprocess.so

gst-custom-opencv_cudaprocess.o : gst-custom-opencv_cudaprocess.cu
	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(CVCCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

lib-gst-custom-opencv_cudaprocess.so : gst-custom-opencv_cudaprocess.o
	$(NVCC) $(ALL_LDFLAGS) $(CVLDFLAGS) $(GENCODE_FLAGS) -o $@ $^ $(LIBRARIES)

clean:
	rm lib-gst-custom-opencv_cudaprocess.so gst-custom-opencv_cudaprocess.o

clobber: clean
4 Likes