Hello I have been using the xavier for a project where we need to encode some raw bgra data
to a h264/h265 video stream. I took the NvVideoEncoder from the Jetson multimedia api and used
this inside my program. (Maybe this is a more general Cuda coding issue, but I am working
on this piece of hardware and was wondering if I interpreted something wrong.)
The whole program runs without problems, only the color conversion from bgra to yuv420 planner does not
give the desired result. It seems in the Y transforms goes wel, I can see the image but any distinct
color is present. The image looks grayish.
The conversion is done in a cuda kernel where I use a rgba surface_object as source and a CUeglFrame
as destination.
Part where I put the parameters to my function which calls a Cuda kernel.
The CUeglFrame comes from a EGLImageKHR which comes from a Fd buffer.
if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH)
{
convert_rgb_to_yuv(frame.buffer,
(uint8_t *) eglFrame.frame.pPitch[0],
(uint8_t *) eglFrame.frame.pPitch[1],
(uint8_t *) eglFrame.frame.pPitch[2],
eglFrame.pitch, eglFrame.width, eglFrame.height, stream(0));
}
I have checked the values of the eglFrame and width and height are correct, the pitch is the same
as width. the pixelformat is CU_EGL_COLOR_FORMAT_YUV420_PLANAR which should give me three planes/surfaces
where each surface has its own color channel.
The function and kernel is:
inline __device__ void rgb_to_y(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t &y)
{
y = static_cast<uint8_t>(((66 * static_cast<int>(r) + 129 * static_cast<int>(g) + 25 * static_cast<int>(b) + 128) >> 8) + 16);
}
inline __device__ void rgb_to_yuv(const uint8_t r, const uint8_t g, const uint8_t b, uint8_t &y, uint8_t &u, uint8_t &v)
{
rgb_to_y(r, g, b, y);
u = static_cast<uint8_t>(((-38 * static_cast<int>(r) - 74 * static_cast<int>(g) + 112 * static_cast<int>(b) + 128) >> 8) + 128);
v = static_cast<uint8_t>(((112 * static_cast<int>(r) - 94 * static_cast<int>(g) - 18 * static_cast<int>(b) + 128) >> 8) + 128);
}
__global__ void convert_rgb_to_yuv_kernel(cudaSurfaceObject_t src, uint8_t *dstY, uint8_t *dstU,
uint8_t *dstV, unsigned int dstPitch, unsigned int width, unsigned int height)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
const int x1 = x + 1;
const int y1 = y + 1;
if (x1 >= width || y1 >= height)
return;
uint8_t *y_plane = dstY;
uint8_t *u_plane = dstU;
uint8_t *v_plane = dstV;
uchar4 px;
uint8_t y_val, u_val, v_val;
surf2Dread<uchar4>(&px, src, x * sizeof(uchar4), y);
rgb_to_yuv(px.z, px.y, px.x, y_val, u_val, v_val);
y_plane[y * dstPitch + x] = y_val;
u_plane[(y / 2) * (dstPitch / 2) + (x / 2)] = u_val;
v_plane[(y / 2) * (dstPitch / 2) + (x / 2)] = v_val;
surf2Dread<uchar4>(&px, src, x1 * sizeof(uchar4), y);
rgb_to_y(px.z, px.y, px.x, y_val);
y_plane[y * dstPitch + x1] = y_val;
surf2Dread<uchar4>(&px, src, x * sizeof(uchar4), y1);
rgb_to_y(px.z, px.y, px.x, y_val);
y_plane[y1 * dstPitch + x] = y_val;
surf2Dread<uchar4>(&px, src, x1 * sizeof(uchar4), y1);
rgb_to_y(px.z, px.y, px.x, y_val);
y_plane[y1 * dstPitch + x1] = y_val;
}
void convert_rgb_to_yuv(cudaSurfaceObject_t src, uint8_t *pDevPtrDestY, uint8_t *pDevPtrDestU,
uint8_t *pDevPtrDestV, unsigned int destPitch, unsigned int width, unsigned int height, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 blocks(((width / 2) + threads.x - 1) / threads.x, ((height / 2) + threads.y - 1) / threads.y, 1);
convert_rgb_to_yuv_kernel<<<blocks, threads, 0, stream>>>(src,
pDevPtrDestY, pDevPtrDestU, pDevPtrDestV,
destPitch, width, height);
}
In one instance I changed the encoder format to yuv444 planar and changed the kernel accordingly and
than I do get colors out. But the program stops after a couple of frames. That is some other issue,
I dont want to tackle for now. To get a working prototype I want the bgra to yuv420 conversion to work.
Thank you