Getting scrambled pixels on NVENC output

joaopedro.socas · June 15, 2023, 3:44pm

Good day to all!
I’m having some problems getting the nVenc encoder to work. I’m mostly using the C++ classes provided on the SDK (NvEncoder and NvEncoderCuda), with just some methods I added to NvEncoderCuda so I could make most of the validations suggested on the programming guide.

I simplified the application, so I could just test the encode part. I’m rendering a simple OpenGL scene, and sending the frames to the encoder through a registered pixelbuffer as a cuda resource. Then saving the output on a file, the same way is made on the examples.

I wonder if it is just some dumb mistake I made, or if the lack of correct rate control configuration (I was trying to get a reasonable output before fixing it)

If someone is willing to help, the important parts of the code are below. Thanks in advance!

This is what I’m rendering (screen captured) :

And this is the NVENC output:

The OpenGL code:

    // (...)<setting other OpenGL things for the render> //

    unsigned int frame_buffer;
    glGenFramebuffers(1, &frame_buffer);
    glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer);

    unsigned int texture_color_buffer;
    glGenTextures(1, &texture_color_buffer);
    glBindTexture(GL_TEXTURE_2D, texture_color_buffer);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, SCR_WIDTH, SCR_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_color_buffer, 0);

    unsigned int render_buffer_depth;
    glGenRenderbuffers(1, &render_buffer_depth);
    glBindRenderbuffer(GL_RENDERBUFFER, render_buffer_depth);
    glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, SCR_WIDTH, SCR_HEIGHT);
    glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, render_buffer_depth);

    unsigned int pixel_buffer;
    glGenBuffers(1, &pixel_buffer);
    glBindBuffer(GL_PIXEL_PACK_BUFFER, pixel_buffer);
    glBufferData(GL_PIXEL_PACK_BUFFER, SCR_WIDTH * SCR_HEIGHT * 4, NULL, GL_DYNAMIC_COPY);
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);

    glEnable(GL_DEPTH_TEST);
    glEnable(GL_CULL_FACE);

    // the call to set and create de encoder
    EncodeNVENC encoder(0, pixel_buffer, SCR_WIDTH, SCR_HEIGHT, "encoded.h264");

    while (!glfwWindowShouldClose(window))
    {
        glEnable(GL_DEPTH_TEST);
        glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer);
        float currentFrame = glfwGetTime();
        deltaTime = currentFrame - lastFrame;
        lastFrame = currentFrame;

        processInput(window);
        glClearColor(0.f, 0.f, 0.f, 1.f);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

        // (...)<setting some uniforms, camera movement and rendering the model> //

        glBindFramebuffer(GL_READ_FRAMEBUFFER, frame_buffer);
        glBindBuffer(GL_PIXEL_PACK_BUFFER, pixel_buffer);
        glReadPixels(0, 0, SCR_WIDTH, SCR_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, 0);
        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);

        encoder.Encode();
        
        // (...)<renders the output to a quad on the screen> //
     }

     encoder.CleanupEncoder();

the class I made to encode:

// .h part
class EncodeNVENC
{
public:
	EncodeNVENC(unsigned int cuda_device_id, unsigned int gl_pixel_buffer, unsigned int width, unsigned int height, const char* output_file_path);
	
	void Encode();
	void CleanupEncoder();
private:
	unsigned int cuda_device_id;
	CUcontext cuda_context;
	CUdevice cuda_device;
	NvEncoderCuda* cuda_encoder;

	struct cudaGraphicsResource* cuda_pixel_buffer;
	unsigned int gl_pixel_buffer;
	
	std::ofstream output_file;

	unsigned int width, height;
};

// .cpp part
EncodeNVENC::EncodeNVENC(unsigned int cuda_device_id, unsigned int gl_pixel_buffer, unsigned int width, unsigned int height, const char* output_file_path) :
    cuda_device_id(cuda_device_id), gl_pixel_buffer(gl_pixel_buffer), width(width), height(height)
{
    // For now, hardcoded GUIDS
    GUID encoder_GUID = NV_ENC_CODEC_H264_GUID;
    GUID preset_GUID = NV_ENC_PRESET_P7_GUID;
    NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_ABGR;
    NV_ENC_TUNING_INFO tuning_info = NV_ENC_TUNING_INFO_HIGH_QUALITY;

    // --------------------------------------------- CUDA AND GUIDs VALIDATION START --------------------------------------------- //

    if (cuInit(0) < 0)
    {
        std::cout << "CUDA driver initializing error!" << std::endl;
        return;
    }

    int n_devices = 0;
    if (cuDeviceGetCount(&n_devices) < 0)
    {
        std::cout << "Could not assert the number of CUDA devices!" << std::endl;
        return;
    }
   
    if (cuda_device_id < 0 || cuda_device_id >= n_devices)
    {
        std::cout << "Requested device ID not found. The device ID must be between 0 and " << n_devices - 1 << std::endl;
        return;
    }

    if (cuDeviceGet(&cuda_device, cuda_device_id) < 0)
    {
        std::cout << "Error getting the device with following ID: " << cuda_device_id << std::endl;
        return;
    }

    if (cuCtxCreate(&cuda_context, 0, cuda_device) < 0)
    {
        std::cout << "Error on CUDA context creation!" << std::endl;
        return;
    }

    
    cuda_encoder = new NvEncoderCuda(cuda_context, width, height, buffer_format);

    unsigned int encode_GUID_count = cuda_encoder->GetEncodeGUIDCount();
    if (encode_GUID_count < 1)
    {
        std::cout << "No compatible encode GUIDs with this device" << std::endl;
        return;
    }

    std::vector<GUID> encode_GUID_list;
    encode_GUID_list.resize(encode_GUID_count);
    cuda_encoder->GetEncodeList(encode_GUID_list.data(), encode_GUID_count, &encode_GUID_count);
    if (std::find(encode_GUID_list.begin(), encode_GUID_list.end(), encoder_GUID) == encode_GUID_list.end())
    {
        std::cout << "Selected encoder GUID not compatible with this device" << std::endl;
        return;
    }

    unsigned int encode_preset_GUID_count = cuda_encoder->GetEncodePresetCount(encoder_GUID);
    if (encode_preset_GUID_count < 1)
    {
        std::cout << "No presets were found for this encode GUID!" << std::endl;
        return;
    }

    std::vector<GUID> encode_preset_GUID_list;
    encode_preset_GUID_list.resize(encode_preset_GUID_count);
    cuda_encoder->GetEncodePresetList(encoder_GUID, encode_preset_GUID_list.data(), encode_preset_GUID_count, &encode_preset_GUID_count);
    if (std::find(encode_preset_GUID_list.begin(), encode_preset_GUID_list.end(), preset_GUID) == encode_preset_GUID_list.end())
    {
        std::cout << "Selected preset not found!" << std::endl;
        return;
    }

    unsigned int input_format_count = cuda_encoder->GetInputFormatCount(encoder_GUID);
    if (input_format_count < 1)
    {
        std::cout << "Error getting compatible input formats!" << std::endl;
        return;
    }

    std::vector<NV_ENC_BUFFER_FORMAT> input_format_list;
    input_format_list.resize(encode_preset_GUID_count);
    cuda_encoder->GetInputFormats(encoder_GUID, input_format_list.data(), input_format_count, &input_format_count);
    if (std::find(input_format_list.begin(), input_format_list.end(), buffer_format) == input_format_list.end())
    {
        std::cout << "Selected input format not found!" << std::endl;
        return;
    }

    // --------------------------------------------- CUDA AND GUIDs VALIDATION END --------------------------------------------- //

    // Encoder configuration
    NV_ENC_INITIALIZE_PARAMS initialize_params = { NV_ENC_INITIALIZE_PARAMS_VER };
    NV_ENC_CONFIG encode_config = { NV_ENC_CONFIG_VER };
    NvEncoderInitParam encode_options;

    initialize_params.encodeConfig = &encode_config;
    encode_options.SetInitParams(&initialize_params, buffer_format);
    cuda_encoder->CreateDefaultEncoderParams(&initialize_params, encoder_GUID, preset_GUID, tuning_info);
    cuda_encoder->CreateEncoder(&initialize_params);

    // OpenGL pixel buffer registration as cuda resource
    cudaError_t result = cudaGraphicsGLRegisterBuffer(&cuda_pixel_buffer, gl_pixel_buffer, cudaGraphicsRegisterFlagsReadOnly);
    if (result != cudaError::cudaSuccess)
    {
        std::cout << "Failed to register OpenGL pixel buffer as a cuda resource!" << std::endl;
    }

    // File oppening
    output_file.open(output_file_path, std::ios::out, std::ios::binary);
}


void EncodeNVENC::Encode()
{
    char* mapped_cuda_pixel_buffer = nullptr;
    size_t num_bytes;

    //Mapping the resource
    cudaError_t result = cudaGraphicsMapResources(1, &cuda_pixel_buffer);
    if (result != cudaError::cudaSuccess)
    {
        std::cout << "Failed to map the cuda resource" << std::endl;
        return;
    }
    result = cudaGraphicsResourceGetMappedPointer((void**)&mapped_cuda_pixel_buffer, &num_bytes, cuda_pixel_buffer);
    if (result != cudaError::cudaSuccess)
    {
        std::cout << "Failed to get mapped cuda resource pointer" << std::endl;
        return;
    }

    // Transfer data to the input frame
    const NvEncInputFrame* input_frame = cuda_encoder->GetNextInputFrame();
    NvEncoderCuda::CopyToDeviceFrame(cuda_context, mapped_cuda_pixel_buffer, 0, (CUdeviceptr)input_frame->inputPtr,
        (int)input_frame->pitch,
        width,
        height,
        CU_MEMORYTYPE_DEVICE,
        NV_ENC_BUFFER_FORMAT_ABGR,
        input_frame->chromaOffsets,
        input_frame->numChromaPlanes);
    cudaGraphicsUnmapResources(1, &cuda_pixel_buffer);

    std::vector<std::vector<uint8_t>> output_buffer{};
    cuda_encoder->EncodeFrame(output_buffer);
    for (std::vector<uint8_t>& packet : output_buffer)
    {
        output_file.write(reinterpret_cast<char*>(packet.data()), packet.size());
    }
}

void EncodeNVENC::CleanupEncoder()
{
    std::vector<std::vector<uint8_t>> drawing_buffer{};
    cuda_encoder->EndEncode(drawing_buffer);
    for (std::vector<uint8_t>& packet : drawing_buffer)
    {
        output_file.write(reinterpret_cast<char*>(packet.data()), packet.size());
    }
    output_file.close();
    cuda_encoder->DestroyEncoder();
    delete cuda_encoder;
}

I have also tried a infinitude of custom initialize parameters, presets, encode configs and encode codec configurations, but aways ended with similar results.

MarkusHoHo · June 26, 2023, 3:42pm

Hi @joaopedro.socas and welcome to the NVIDIA developer forums.

I am not an expert in this area and sadly could not find anyone yet to look at your issue in detail.

But my first suggestion would be that you are facing a buffer mismatch in terms of color format, padding, general format configuration, buffer clearing or similar. Because the frames start off more or less correctly, they only go wrong after starting to write non-null content.

What you should do is to first figure out what part is causing the wrong encoding. Don’t use a full-fledged GL render but just send a simple quad to the encoder. Without animation or anything. Just verify that you have the API and buffer formats all figured out correctly. After that you ca go and do something more sophisticated.

As I said, I can only help on a “how I would approach this” level, I can’t debug your code for you.

Lastly, if you want a moderator’s attention, no need to immediately send PMs, just tag one of us with the @ sign :-)

Thanks!

joaopedro.socas · July 10, 2023, 5:57pm

Hi @MarkusHoHo!

After a lot of tries and errors, I still stuck.
I think there is no way to simplify further. I’m directly writing to the buffer with a single CUDA kernel and saving the output to a file:

const unsigned int SCR_WIDTH = 256;
const unsigned int SCR_HEIGHT = 256;

int main()
{
    const char* output_file_path = "encoded.h264";
    EncodeNVENC encoder(0, SCR_WIDTH, SCR_HEIGHT, output_file_path);
    for (int i = 0; i < 10; i++)
    {
        encoder.Encode();
    }
    encoder.CleanupEncoder();
    return 0;
}

The encoder creating is the same, but I removed everything openGL related. The encode call now only calls the .cu function with the returned inputFrame:

void EncodeNVENC::Encode()
{
    const NvEncInputFrame* input_frame = cuda_encoder->GetNextInputFrame();

    generate_lines(width, height, (CUdeviceptr)input_frame->inputPtr); // Draws direct on the buffer through a single cuda kernel

    std::vector<std::vector<uint8_t>> output_buffer{};
    cuda_encoder->EncodeFrame(output_buffer);
    for (std::vector<uint8_t>& packet : output_buffer)
    {
        output_file.write(reinterpret_cast<char*>(packet.data()), packet.size());
    }
}

void EncodeNVENC::CleanupEncoder()
{
    std::vector<std::vector<uint8_t>> output_buffer{};
    cuda_encoder->EndEncode(output_buffer);
    for (std::vector<uint8_t>& packet : output_buffer)
    {
        output_file.write(reinterpret_cast<char*>(packet.data()), packet.size());
    }
    output_file.close();
    cuda_encoder->DestroyEncoder();
    delete cuda_encoder;
}

and this is the cuda part:

__global__ static void generate_lines_kernel(unsigned char* target, unsigned int width, unsigned int height)
{
    for (int i = 0; i < (width * height); i++)
    {
        target[i * 4] = 0;
        target[i * 4 + 1] = 0;
        target[i * 4 + 2] = 0;
        target[i * 4 + 3] = 255;
    }

    for (int j = height/4; j < (height/4)*3; j += 4)
    {
        for (int i = j * width + width/4; i < j * width + (width / 4) * 3; i++)
        {
            target[i * 4] = 255;
            target[i * 4 + 1] = 255;
            target[i * 4 + 2] = 255;
            target[i * 4 + 3] = 255;
        }
    }
}

void generate_lines(unsigned int width, unsigned int height,  CUdeviceptr target)
{
    generate_lines_kernel <<<1, 1>>> ((unsigned char*)target, width, height);
}

When decoding this with ffmepg to png, I get 10 black frames and this error output:

[h264 @ 000001f0484d8680] error while decoding MB 14 6, bytestream -34
[h264 @ 000001f0484d8680] concealing 195 DC, 195 AC, 195 MV errors in I frame
[h264 @ 000001f0484baa80] error while decoding MB 7 15, bytestream -5
[h264 @ 000001f0484baa80] concealing 58 DC, 58 AC, 58 MV errors in P frame

Instead, if I just fully paint the same area I’m drawing the lines, i get one less error code, but the output frames are right:

[h264 @ 0000024ac8936e80] error while decoding MB 7 15, bytestream -5
[h264 @ 0000024ac8936e80] concealing 58 DC, 58 AC, 58 MV errors in P frame

output_0001

If you find someone to help me, the data from the packets probed for the lines test are below:

[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=403
pos=0
flags=K__
data=
00000000: 0000 0001 6764 000d ac2c a504 021b 0110 …gd…,…
00000010: 0000 3e80 000e a608 4000 0000 0168 eb8f …>…@…h…
00000020: 2c00 0000 0165 b804 000e fffa 7f0f c57f ,…e…
00000030: e0bc 63d9 5ffe 5e7a 9b09 7c5e c1e6 0bc4 …c..^z…|^…
00000040: eb00 03db 0000 0302 7b81 379f 8b3d 1ad9 …{.7…=…
00000050: 7f07 71f9 c85e 9e0b bba6 5fb3 e419 788b …q…^……x.
00000060: 54cf 115c 5074 3977 5c8d 8432 74d8 84a5 T…\Pt9w..2t…
00000070: b14f 596e db16 c938 839b 7bd2 d881 0257 .OYn…8…{…W
00000080: 71b7 07e2 4677 e8b8 8fc0 84d4 0db4 c2c7 q…Fw…
00000090: b572 1a18 e852 4e2d 463f c267 1925 2488 .r…RN-F?.g.%$.
000000a0: 93d7 101b d55b c004 6d60 7a2a 58b1 ced1 …[…m`z*X…
000000b0: 9229 2659 73ff f939 28b4 1d9e a895 832b .)&Ys…9(…+
000000c0: 7820 94ce 365b 4669 fbd3 9a8f 46b9 0d0a x …6[Fi…F…
000000d0: 40b9 5176 ad51 a56d db68 664c b03c 3e5a @.Qv.Q.m.hfL.<>Z
000000e0: 7d44 1980 2245 4655 f25c 1798 97e2 e662 }D…"EFU..…b
000000f0: 34a7 a49b e8bc ea44 6c3a 7887 4ff3 6e8d 4…Dl:x.O.n.
00000100: 2878 69a1 5582 aa93 30bd a231 cf3e 8142 (xi.U…0…1.>.B
00000110: c57b dfc9 e870 b49e 4b1c 04a6 f9d9 9743 .{…p…K…C
00000120: 3c9a 340b 0e53 aa08 4933 212d e1d4 31c5 <.4…S…I3!-…1.
00000130: 8f02 6162 bdef e4f5 33d5 01a7 7f0d 0ab6 …ab…3…
00000140: 915a c57d c4c0 1687 8e0f f1d9 6315 e10d .Z.}…c…
00000150: 221e 3f07 40a1 62bd efe4 f6b5 134d 6c52 ".?.@.b…MlR
00000160: abbe 72fc 7a5d fc0c be4b ca2f 57cc 1fde …r.z]…K./W…
00000170: 80cd de04 8337 d082 e601 adcc e380 58ed …7…X.
00000180: a5db e78c 2d27 92c7 0129 d49c 4b0d a000 …-‘…)…K…
00000190: 007a 40 .z@
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=17
pos=403
flags=___
data=
00000000: 0000 0001 61e0 2119 0fff fe9e 1000 0d0a …a.!..
00000010: 98 .
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=14
pos=420
flags=___
data=
00000000: 0000 0001 61a8 1027 8877 ff00 019f …a…’.w…
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=13
pos=434
flags=___
data=
00000000: 0000 0001 01a8 1817 90ef 0001 9f …
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=14
pos=447
flags=___
data=
00000000: 0000 0001 01a8 1836 a43b ff00 019f …6.;…
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=18
pos=461
flags=___
data=
00000000: 0000 0001 61e0 620d 2910 47ff fea9 9600 …a.b.).G…
00000010: 015f ._
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=47
pos=479
flags=___
data=
00000000: 0000 0001 61a8 2064 2087 ffef 7763 2d7f …a. d …wc-.
00000010: e0c8 1285 c001 7b7b e803 bc00 000b aa0f …{{…
00000020: b4d0 a000 0003 0000 0300 0003 0011 50 …P
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=47
pos=526
flags=___
data=
00000000: 0000 0001 01a8 2856 a410 ffef 7763 2d7f …(V…wc-.
00000010: e0c8 1285 c001 7b7b e803 bc00 000b aa0f …{{…
00000020: b4d0 a000 0003 0000 0300 0003 0011 50 …P
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=46
pos=573
flags=___
data=
00000000: 0000 0001 01a8 2874 410f ef77 632d 7fe0 …(tA…wc-…
00000010: c812 85c0 017b 7be8 03bc 0000 0baa 0fb4 …{{…
00000020: d0a0 0000 0300 0003 0000 0300 1150 …P
[/PACKET]
[PACKET]
codec_type=video
stream_index=0
pts=N/A
pts_time=N/A
dts=N/A
dts_time=N/A
duration=40000
duration_time=0.033333
size=18
pos=619
flags=___
data=
00000000: 0000 0001 61e0 a24d 2910 4bff fea9 9600 …a…M).K…
00000010: 015f ._
[/PACKET]

Topic		Replies	Views
Weird output with Nvidia Encoder SDK Video Processing & Optical Flow	1	1509	November 30, 2022
nvEncEncodePicture stuck for more input Video Processing & Optical Flow cuda , nvenc	2	909	July 24, 2023
nvEncGetEncodePresetConfig: NV_ENC_ERR_INVALID_DEVICE Miscellaneous	2	3988	June 29, 2021
Video Codec SDK 9.1.23 crash with RC lookahead Video Processing & Optical Flow	6	1261	August 17, 2021
Error calling the NvEncCreateBitstreamBuffer() function in the NvEncode api Video Processing & Optical Flow	2	1244	October 30, 2019
nvEncUnlockInputBuffer() causes segfault Video Processing & Optical Flow	2	905	June 14, 2018
nvivafilter customer-lib implementation (code available) Jetson AGX Xavier	7	1773	October 18, 2021
[Linux] NVCuvid - Performarce CUDA Programming and Performance	13	4021	March 9, 2016
Fail to get frame in Tensor Metadata DeepStream SDK	24	2489	October 12, 2021
Session count limitation for NVENC (No Maxwell GPUs with 2+ NEVENC sessions?) GPU-Accelerated Libraries	25	33124	February 26, 2018

Getting scrambled pixels on NVENC output

Related topics