Poor encoding quality and frame rates with VisionWorks on TX2

We want to use a 4K MIPI CSI2 color camera at 30fps with Jetson TX2, to perform some processing on the images and output an encoded stream at 1080p 30fps, with the highest encoding quality possible.

Using Jetpack 3.2, we found performance problems, only when using the encoder. We tried several codes, including one of our current products which uses CUDA in Quadro GPU’s. It works, but performance is completely degraded when encoding.

We have been reviewing the forum posts about this issue and found related topics, one of them talking aobut decoding performance problems.

We attach a sample code made with VisionWorks, that reproduces the problem. In that code we don’t perform any explicit computation, only frame capture and frame encoding.

main.cpp

#include "VWpipeline.h"

int main(int argc, char** argv)
{
    VW_pipeline pipeline(argc, argv);

    pipeline.create_pipeline_camera();
    pipeline.run_pipeline_camera();

    return 0;
}

VWpipeline.h

#ifndef VWPIPELINE_H_
#define VWPIPELINE_H_

#include <string>
#include <memory>

#include <NVX/Application.hpp>
#include <OVX/FrameSourceOVX.hpp>
#include <OVX/RenderOVX.hpp>

class VW_pipeline {

public:
    VW_pipeline(int argc, char** argv);
    ~VW_pipeline();

    int  create_pipeline_camera();
    void run_pipeline_camera();

private:
    ovxio::FrameSource::Parameters config;

    vx_context context;
    
    std::unique_ptr<ovxio::FrameSource> frameSource;
    std::unique_ptr<ovxio::Render> renderer;

    int _argc;
    char **_argv;

};

#endif /* VWPIPELINE_H_ */

VWpipeline.cpp

#include "VWpipeline.h"

#include <string>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <memory>

#include <NVX/nvx.h>
#include <NVX/nvx_timer.hpp>

#include <NVX/SyncTimer.hpp>
#include <OVX/FrameSourceOVX.hpp>
#include <OVX/RenderOVX.hpp>
#include "OVX/UtilityOVX.hpp"

void VX_CALLBACK log_callback(vx_context    context,
                              vx_reference  ref,
                              vx_status     status,
                              const vx_char string[])
{
    printf( "LOG: [ status = %d ] %s\n", status, string );
    fflush( stdout );
}

struct EventData {
    EventData(): alive(true), pause(false) {}

    bool alive;
    bool pause;
};

static void keyboardEventCallback(void* context, vx_char key, vx_uint32 /*x*/, vx_uint32 /*y*/)
{
    EventData* eventData = static_cast<EventData*>(context);
    if (key == 27) {
        // escape
        eventData->alive = false;
    }
    else if (key == 32) {
        eventData->pause = !eventData->pause;
    }
}

VW_pipeline::VW_pipeline(int argc, char** argv) {
    _argc = argc;
    _argv = argv;
}

VW_pipeline::~VW_pipeline() {

}

int VW_pipeline::create_pipeline_camera() {
    nvxio::Application &app = nvxio::Application::get();
    ovxio::printVersionInfo();

config.frameWidth = 1920;
    config.frameHeight = 1080;
    config.fps = 25;

    std::string resolution = "1920x1080";
    std::string input = "device:///nvcamera";

    std::cout << config.fps << std::endl;

    app.setDescription("This sample captures frames from NVIDIA GStreamer camera");
    app.addOption('r', "resolution", "Input frame resolution", nvxio::OptionHandler::oneOf(&resolution,
       { "2592x1944", "2592x1458", "1920x1080", "1280x720", "640x480" }));
    app.addOption('f', "fps", "Frames per second", nvxio::OptionHandler::unsignedInteger(&config.fps,
       nvxio::ranges::atLeast(10u) & nvxio::ranges::atMost(120u)));

    app.init(_argc, _argv);

    // create context
    context = vxCreateContext();

    // get logs
    vxRegisterLogCallback(context, log_callback, vx_false_e);
    vxAddLogEntry((vx_reference) context, VX_FAILURE, "Hello there!\n");

    // create frame source
    frameSource = ovxio::createDefaultFrameSource(context, input);
    if (!frameSource) {
        std::cout << "Error: cannot open source! nullptr" << std::endl;
        return nvxio::Application::APP_EXIT_CODE_NO_RESOURCE;
    }

    if (!frameSource->setConfiguration(config)) {
        std::cout << "Error: cannot setup configuration the framesource!" << std::endl;
        return nvxio::Application::APP_EXIT_CODE_INVALID_VALUE;
    }

    if (!frameSource->open()) {
        std::cout << "Error: cannot open source!" << std::endl;
        return nvxio::Application::APP_EXIT_CODE_NO_RESOURCE;
    }

    config = frameSource->getConfiguration();

    // create render object (video render which not work)
    /*
    renderer = ovxio::createVideoRender(context, "output.mp4", config.frameWidth, config.frameHeight, config.format);
    if (!renderer) {
        std::cout << "Error: Cannot open video render!" << std::endl;
        return nvxio::Application::APP_EXIT_CODE_NO_RENDER;
    }
    */

    // create render object (display render which work)
    renderer = ovxio::createDefaultRender(context, "Window", config.frameWidth, config.frameHeight);
    if (!renderer) {
        std::cout << "Error: Cannot open default render!" << std::endl;
        return nvxio::Application::APP_EXIT_CODE_NO_RENDER;
    }

    return 1;
}

void VW_pipeline::run_pipeline_camera() {
    nvxio::Application &app = nvxio::Application::get();

    EventData eventData;
    renderer->setOnKeyboardEventCallback(keyboardEventCallback, &eventData);

    vx_image frame = vxCreateImage(context, config.frameWidth,
                                   config.frameHeight, config.format);
    NVXIO_CHECK_REFERENCE(frame);

    ovxio::Render::TextBoxStyle style = {{255,255,255,255}, {0,0,0,127}, {10,10}};

    nvx::Timer totalTimer;
    totalTimer.tic();

    int i = 0;
    while (eventData.alive) {

        // stop when captured 250 frames
        if (i == 250) {
            eventData.alive = false;
        }

        ovxio::FrameSource::FrameStatus status = ovxio::FrameSource::OK;
        if (!eventData.pause) {
            status = frameSource->fetch(frame);
        }

        switch(status) {
        case ovxio::FrameSource::OK:
            {
                double total_ms = totalTimer.toc();
                totalTimer.tic();

                std::ostringstream txt;
                txt << std::fixed << std::setprecision(1);

                txt << "Camera mode: " << config.frameWidth << 'x' << config.frameHeight << ' ' << config.fps << " FPS" << std::endl;
                txt << "Algorithm: " << "No Processing" << std::endl;
                txt << "Display: " << total_ms  << " ms / " << 1000.0 / total_ms << " FPS" << std::endl;

                txt << std::setprecision(6);
                txt.unsetf(std::ios_base::floatfield);

                txt << "FRAME RATE IS NOT CONSTRAINED" << std::endl;
                txt << "FRAME NUM " << i << std::endl;

                std::cout << txt.str();

                txt << "Space - pause/resume" << std::endl;
                txt << "Esc - close the demo";

                renderer->putImage(frame);
                renderer->putTextViewport(txt.str(), style);

                if (!renderer->flush())
                    eventData.alive = false;
                i++;
            }
            break;
        case ovxio::FrameSource::TIMEOUT:
            {
                // Do nothing
            }
            break;
        case ovxio::FrameSource::CLOSED:
            eventData.alive = false;
            break;
        }

    }

    renderer->close();

    std::cout << "end " << std::endl;

    //
    // Release all objects
    //
    //vxReleaseImage(&frame);
}

Makefile

# SELECTING ARCHITECTURE

# not doing cross-compiling
CC					= g++-5
ARCH				= $(shell uname -m)

$(info Selected architecture: $(ARCH))
$(info )
OUT_DIR				= build/$(ARCH)
OUTPUT				= $(OUT_DIR)/vwprova

# ############################################################################

CUDA_PATH			= /usr/local/cuda/targets/$(ARCH)-linux
VW_PATH				= $(HOME)/Documents/VisionWorks-1.6-Samples

# ############################################################################
# flags and include directories

CXXFLAGS 			+= -std=gnu++11 -g

EXTERNAL_CFLAGS		=
EXTERNAL_CFLAGS		+= $(shell pkg-config --cflags cudart-9.0)

NVXIO_CFLAGS		=
NVXIO_CFLAGS		+= -I $(VW_PATH)/nvxio/include
NVXIO_CFLAGS		+= -I $(VW_PATH)/nvxio/src/
NVXIO_CFLAGS		+= -I $(VW_PATH)/nvxio/src/NVX/

INCLUDES			=
INCLUDES			+= $(EXTERNAL_CFLAGS)
INCLUDES			+= $(NVXIO_CFLAGS)
INCLUDES			+= -I $(VW_PATH)/3rdparty/opengl
INCLUDES			+= -I $(VW_PATH)/3rdparty/glfw3/include
INCLUDES			+= -I $(VW_PATH)/3rdparty/freetype/include
INCLUDES			+= -I src

# ############################################################################
# link directories

EXTERNAL_LIBS		=
EXTERNAL_LIBS		+= $(shell pkg-config --libs cudart-9.0)
EXTERNAL_LIBS		+= $(shell pkg-config --libs visionworks)

NVXIO_LIBS			=
NVXIO_LIBS			+= $(VW_PATH)/libs/$(ARCH)/linux/release/libovx.a
NVXIO_LIBS			+= $(VW_PATH)/libs/$(ARCH)/linux/release/libnvx.a

LIBRARIES			=
LIBRARIES			+= -L/usr/lib
LIBRARIES			+= $(NVXIO_LIBS)
LIBRARIES			+= $(VW_PATH)/3rdparty/freetype/libs/libfreetype.a
LIBRARIES			+= $(VW_PATH)/3rdparty/glfw3/libs/libglfw3.a
ifeq ($(ARCH),aarch64)
LIBRARIES			+= /usr/lib/aarch64-linux-gnu/tegra-egl/libGLESv2.so.2
LIBRARIES			+= -L/usr/lib/aarch64-linux-gnu/tegra-egl -lEGL
endif
LIBRARIES			+= $(shell pkg-config --libs gl)
LIBRARIES			+= $(shell pkg-config --libs xrandr xi xxf86vm x11)
LIBRARIES			+= $(shell pkg-config --libs gstreamer-base-1.0 gstreamer-pbutils-1.0 gstreamer-app-1.0)
ifeq ($(ARCH),aarch64)
LIBRARIES			+= /usr/lib/aarch64-linux-gnu/tegra/libcuda.so
endif
LIBRARIES			+= $(EXTERNAL_LIBS)

# ############################################################################
# some extra ??

LDFLAGS				=
LDFLAGS				+= -Wl,--allow-shlib-undefined -pthread
LDFLAGS				+= -Wl,-rpath=$(shell pkg-config --libs-only-L cudart-9.0)
LDFLAGS				+= -ldl

# ############################################################################
# Find sources and create a list of object files

CXX_SOURCES		= $(shell find src -name '*.cpp'| grep -e ".*.cpp")
CXX_OBJECTS		= $(CXX_SOURCES:%.cpp=$(OUT_DIR)/%.o)

$(info sources $(CXX_SOURCES))

##############################################################################
# MAKE THE MAGIC

all: directories $(OUTPUT)

$(OUTPUT): $(CXX_OBJECTS) $(CUDA_OBJECTS)
	$(CC) $(LDFLAGS) $^ -o $@ $(LIBRARIES)
		
$(CXX_OBJECTS): $(OUT_DIR)/%.o: %.cpp
	mkdir -p $(@D)
	$(CC) $(INCLUDES) $(CXXFLAGS) -c $< -o $@
	
##############################################################################
# MISC: Create directories and clean

.PHONY: directories
directories:
	$(info Creating build directory and copying dependencies to it)
	mkdir -p $(OUT_DIR)

.PHONY: clean
clean:
	rm -rf $(OUT_DIR)/*

The framerates we get with 1080p source and output are similar to that post: https://devtalk.nvidia.com/default/topic/1027351/?comment=5225416; about 20 fps. Our goal is to achieve, at least, 30 fps with 4K input and analysis (one of every 2 frames) and 1080p encoding output.

Hi,

Please find MMAPI sample to check if the pipeline can fit your requirement.
Thanks.