How can I use nppiYUV422ToRGB_8u_C2C3R() in an expected way?

Hello,
I am trying to implement a cuda application that convert a packed UYVY image to 3 channel packed RGB image cv::Mat.
So I tried with the code below:

#include <iterator>
#include <fstream>
#include <iostream>
#include "opencv2/opencv.hpp"
#include "nppdefs.h"
#include "nppi_support_functions.h"
#include "nppi_color_conversion.h"

int main()
{
  cv::Size raw_size(1920, 1080);
  unsigned char *raw_data = new unsigned char[raw_size.width * raw_size.height * 2]; 
  cv::Mat raw_mat(raw_size, CV_8UC2, raw_data);

  std::string raw_file("camid_1_count_1.raw");
  std::ifstream raw_stream;
  raw_stream.open(raw_file, std::ios::in | std::ios::binary);

  if (!raw_stream.is_open())
  {
    std::cerr << "[ERROR] cannot open the raw file " << raw_file << std::endl;
    std::cerr << std::endl;
    assert(0);
  }
  raw_stream.read((char*)raw_data, sizeof(unsigned char)*raw_size.width*raw_size.height*2);
  raw_stream.close();

  cv::Mat rgb_mat;
  cv::cuda::GpuMat gpu_raw_mat;
  int step;
  Npp8u* gpu_rgb_data = nppiMalloc_8u_C3(raw_size.width, raw_size.height, &step);

  gpu_raw_mat.upload(raw_mat);
  NppiSize ROI = {raw_size.width, raw_size.height};
  NppStatus status = NppStatus::NPP_NO_ERROR;//0
  try {
    std::cout << "gpu_raw_mat.step: " << gpu_raw_mat.step << std::endl;
    status = nppiYUV422ToRGB_8u_C2C3R(gpu_raw_mat.ptr<Npp8u>(),
                                      //3840,
                                      gpu_raw_mat.step,    // <<< really?
                                      gpu_rgb_data,
                                      step,
                                      ROI);
  } catch( cv::Exception& e ) { 
    const char* err_msg = e.what();
    std::cout << "exception caught #1: " << err_msg << std::endl;
  }
  std::cout << "NppStatus : " << status << std::endl;

  cv::cuda::GpuMat gpu_rgb_mat(raw_size.height, raw_size.width, CV_8UC3, gpu_rgb_data);

  std::cout << "[BEFORE]" << std::endl;
  std::cout << "depth : " << rgb_mat.depth() << std::endl;
  std::cout << "channels : " << rgb_mat.channels() << std::endl;
  std::cout << "elemSize : " << rgb_mat.elemSize() << std::endl;
  std::cout << "step1 : " << rgb_mat.step1() << std::endl;
  std::cout << "type : " << rgb_mat.type() << std::endl;

  gpu_rgb_mat.download(rgb_mat);

  std::cout << "[AFTER]" << std::endl;
  std::cout << "depth : " << rgb_mat.depth() << std::endl;
  std::cout << "channels : " << rgb_mat.channels() << std::endl;
  std::cout << "elemSize : " << rgb_mat.elemSize() << std::endl;
  std::cout << "step1 : " << rgb_mat.step1() << std::endl;
  std::cout << "type : " << rgb_mat.type() << std::endl;

  try {
    cv::imshow("RGB Image", rgb_mat);
  } catch( cv::Exception& e ) {
    const char* err_msg = e.what();
    std::cout << "exception caught #2: " << err_msg << std::endl;
  }

  cv::waitKey(0);

  delete[] raw_data;

  return 0;

}

And the experiment with the code above displays an image but the image is not a correct one. Maybe something is wrong so the image is all messed up. And it prints output message like shown below:

gpu_raw_mat.step: 4096
NppStatus : 0
[BEFORE]
depth : 0
channels : 1
elemSize : 0
step1 : 0
type : 0
[AFTER]
depth : 0
channels : 3
elemSize : 3
step1 : 5760
type : 16

I don’t know what I did wrong…
I’d like someone point me what is wrong… And what is expected way to write good code to do nppiYUV422ToRGB_8u_C2C3R()

Thank you very much!

Thank you very much!!