VisionWorks remap for image with 3 channels

What is optimal way to use remap in VisionWorks for image with 3 channels?
My application uses 2 conversions: from cv::Mat to vx_image and vice versa.

My code:
a) vision_works_graph_remapper.h

class VisionWorksGraphRemapper
{
  vx_context context;
  vx_uint32 img_width;
  vx_uint32 img_height;
  vx_remap m_remap;
  vx_imagepatch_addressing_t addr;
  vx_rectangle_t rect;

  vx_graph graph;
  vx_image inputRGB, outputRGB;
  vx_image inputR, inputG, inputB;
  vx_image outputR, outputG, outputB;

  cv::Mat input_cv_img;

public:
  VisionWorksGraphRemapper();
  ~VisionWorksGraphRemapper();

  void init(const cv::Mat &rMapX, const cv::Mat &rMapY);
  void remap(const cv::Mat &src, cv::Mat &dst);
};

b) vision_works_graph_remapper.cpp

#include "visionworks_graph_remapper.h"

VisionWorksGraphRemapper::VisionWorksGraphRemapper()
{
  context = vxCreateContext();
}

VisionWorksGraphRemapper::~VisionWorksGraphRemapper()
{
  vxReleaseContext(&context);
  vxReleaseRemap(&m_remap);
  vxReleaseImage(&inputRGB);
  vxReleaseImage(&outputRGB);

  vxReleaseImage(&inputR);
  vxReleaseImage(&inputG);
  vxReleaseImage(&inputB);

  vxReleaseImage(&outputR);
  vxReleaseImage(&outputG);
  vxReleaseImage(&outputB);

  vxReleaseGraph(&graph);
}

void VisionWorksGraphRemapper::init(const cv::Mat &rMapX, const cv::Mat &rMapY)
{
  img_width = static_cast<vx_uint32>(rMapX.cols);
  img_height = static_cast<vx_uint32>(rMapX.rows);

// Create remap table
  m_remap = vxCreateRemap(context, img_width, img_height, img_width, img_height);
  for (vx_uint32 y = 0; y < img_height; y++) {
    for (vx_uint32 x = 0; x < img_width; x++) {
      vx_status status = vxSetRemapPoint(
        m_remap,
        x,
        y,
        (vx_float32) rMapX.at<float>(y, x),
        (vx_float32) rMapY.at<float>(y, x)
      );
      if (status) {
        std::cout << "ERROR: vxSetRemapPoint(*," << x << "," << y << ","
        << (vx_float32) rMapX.at<float>(x, y) << ","
        << (vx_float32) rMapY.at<float>(x, y) << std::endl;
      }
    }
  }

  input_cv_img = cv::Mat::zeros(img_height, img_width, CV_8UC3);
  rect = { 0u, 0u, img_width, img_height };
  // Import into VisionWorks
  addr.dim_x= img_width;
  addr.dim_y= img_height;
  addr.stride_x= 3*sizeof(vx_uint8);
  addr.stride_y= static_cast<vx_int32>(input_cv_img.step);
  void *ptrs[] = {input_cv_img.data};
  inputRGB = vxCreateImageFromHandle(context, VX_DF_IMAGE_RGB, &addr, ptrs, VX_IMPORT_TYPE_HOST);
  outputRGB = vxCreateImage(context, img_width, img_height, VX_DF_IMAGE_RGB);

  // Create the graph (necessary for creating virtual objects)
  graph = vxCreateGraph(context);

  // Create intermediate graph data objects
  inputR = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);
  inputG = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);
  inputB = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);
  outputR = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);
  outputG = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);
  outputB = vxCreateVirtualImage(graph, img_width, img_height, VX_DF_IMAGE_U8);

  // Extraction of channels
  vxChannelExtractNode(graph, inputRGB, VX_CHANNEL_R, inputR);
  vxChannelExtractNode(graph, inputRGB, VX_CHANNEL_G, inputG);
  vxChannelExtractNode(graph, inputRGB, VX_CHANNEL_B, inputB);

  // Remap each channel
  vxRemapNode(graph, inputR, m_remap, VX_INTERPOLATION_BILINEAR, outputR);
  vxRemapNode(graph, inputG, m_remap, VX_INTERPOLATION_BILINEAR, outputG);
  vxRemapNode(graph, inputB, m_remap, VX_INTERPOLATION_BILINEAR, outputB);

  // Build the output image
  vxChannelCombineNode(graph, outputR, outputG, outputB, NULL, outputRGB);

  // Graph verification
  vx_status status = vxVerifyGraph(graph);
  if (status) std::cout << status << std::endl;
  CV_Assert(status == VX_SUCCESS);
}

void VisionWorksGraphRemapper::remap(const cv::Mat &src, cv::Mat &dst)
{
  void *ptr = NULL;
  vxAccessImagePatch(inputRGB, &rect, 0, &addr, &ptr, VX_WRITE_ONLY);
  src.copyTo(input_cv_img);
  vxCommitImagePatch(inputRGB, &rect, 0, &addr, ptr);

  // Process graph
  vxProcessGraph(graph);

  // Use outputRGB
  VXImageToCVMatMapper mapper(outputRGB);
  dst = mapper.getMat().clone();
}

Hi dborisoglebskiy,

We see one inefficiency, and have one suggestion that would be better performance (but requires an algorithm tradeoff):

1.Get rid of extra copies.
a.Instead of explicitly copying the input and output images in your remap function, you should use createVXImageFromCVMat to get vx_image objects that share the same memory as the input/output cv::Mat objects
b.then, you can set these vx_image objects to the correct parameters for the nodes in the graph by using vxSetParameterByIndex (see feature_tracker demo source code for a good example of this).
2. Unfortunately, there is no way to get around the splitting of channels for current OpenVX API to do Remap. However, if you could get by with using a perspective or affine warp instead, that would be much faster and there would be no need to split channels.

Thanks