frames returned from nveglstreamsrc via EGL stream out of order

running environment: jetpack 3.1 on tx2

I push frames to the gstreamer’s element nveglstreamsrc via EGL stream FIFO, after setting the pipeline to playing state and connecting the CUDA producer to the EGL stream, i push 4 frame buffers to the FIFO. After that, the producer thread start and push new frames every 40 milliseconds.

When pushing new frames, i first call cuEGLStreamProducerReturnFrame to get the returned frame from EGL stream, copy data to the frame’s buffer, and then call cuEGLStreamProducerPresentFrame to push the frame back to the EGL stream FIFO. I print the returned frame’s address, and found that they are out of order. I push 4 frames on the beginning, and only returned the first 2 frames over and over.

Here is the test log:

CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 4, latency 0.

NvEglStreamSrcInitializeEgl: Load library: libEGL.so
Connect EGL stream to cuda producer.
CUDA producer present frame: 0xa48940.
CUDA producer present frame: 0xa4a880.
CUDA producer present frame: 0xa4bc00.
CUDA producer present frame: 0xa4cf80.
Present a new frame 1.
CUDA producer return frame: 0xa48940.
Present a new frame 2.
CUDA producer return frame: 0xa4a880.
Present a new frame 3.
CUDA producer return frame: 0xa48940.
Present a new frame 4.
CUDA producer return frame: 0xa4a880.
Present a new frame 5.
CUDA producer return frame: 0xa48940.
Present a new frame 6.
CUDA producer return frame: 0xa4a880.
Present a new frame 7.
CUDA producer return frame: 0xa48940.
Present a new frame 8.
CUDA producer return frame: 0xa4a880.
Present a new frame 9.
CUDA producer return frame: 0xa48940.
Present a new frame 10.
CUDA producer return frame: 0xa4a880.
Terminate EGL display.

And the test codes below:
main.cpp

#include <thread>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <gst/gst.h>
#include <cuda_runtime.h>

#include "eglstreamproducer.h"

static const int FrameWidth = 800;
static const int FrameHeight = 600;

static EGLStreamProducer *eglStreamProducer = nullptr;

void producerThreadFunc()
{
    if (cudaFree(nullptr) != cudaSuccess) {
        printf("Failed to initialize CUDA context.\n");
        return;
    }

    CUdeviceptr buffer;
    CUresult ret = cuMemAlloc(&buffer, FrameWidth * FrameHeight * 3 / 2);
    if (ret != CUDA_SUCCESS) {
        g_print("cuMemAlloc failed: %d\n.", ret);
        return;
    }

    int cnt = 0;
    while (cnt < 50) {
        std::this_thread::sleep_for(std::chrono::milliseconds(40));

        cnt++;
        g_print("Present a new frame %d.\n", cnt);
        // call cuEGLStreamProducerReturnFrame to get the returned frame from EGL stream,
        // and then call cuEGLStreamProducerPresentFrame to push the frame back to the EGL stream FIFO.
        eglStreamProducer->presentFrame(buffer);
    }

    cuMemFree(buffer);
}

int main(int argc, char *argv[])
{
    gst_init(nullptr, nullptr);

    GstElement *pipeline = gst_pipeline_new("play");
    if (pipeline == nullptr) {
        g_print("Create pipeline failed.\n");
        return -1;
    }

    GstElement *source = gst_element_factory_make("nveglstreamsrc", nullptr);
    if (source == nullptr) {
        g_print("Create eglstream source failed.\n");
        return -1;
    }

    eglStreamProducer = new EGLStreamProducer(4, 0, FrameWidth, FrameHeight);
    g_object_set(source, "display", eglStreamProducer->getEGLDisplay(), nullptr);
    g_object_set(source, "eglstream", eglStreamProducer->getEGLStream(), nullptr);

    GstElement *capFilter = gst_element_factory_make("capsfilter", nullptr);
    if (capFilter == nullptr) {
        g_print("Create capsfilter failed.\n");
        return -1;
    }

    GstCaps *caps = gst_caps_new_simple("video/x-raw", "format", G_TYPE_STRING, "NV12",
                                        "width", G_TYPE_INT, FrameWidth,
                                        "height", G_TYPE_INT, FrameHeight,
                                        "framerate", GST_TYPE_FRACTION, 25, 1, NULL);

    GstCapsFeatures *feature = gst_caps_features_new("memory:NVMM", NULL);
    gst_caps_set_features(caps, 0, feature);

    /* Set capture caps on capture filter */
    g_object_set(capFilter, "caps", caps, NULL);
    gst_caps_unref(caps);

    GstElement *sink = gst_element_factory_make("fakesink", nullptr);
    if (sink == nullptr) {
        g_print("Create overlay sink failed.\n");
        return -1;
    }

    gst_bin_add_many(GST_BIN(pipeline), source, capFilter, sink, nullptr);
    if (!gst_element_link_many(source, capFilter, sink, nullptr)) {
        g_print("Link elememt eglstream source <-> overlay sink failed.\n");
        return -1;
    }

    GstStateChangeReturn ret = gst_element_set_state(pipeline, GST_STATE_PLAYING);
    if (ret == GST_STATE_CHANGE_FAILURE) {
        g_print("Change pipeline state to %s failed.\n", gst_element_state_get_name(GST_STATE_PLAYING));
        return -1;
    }

    if (!eglStreamProducer->connectEGLProducer()) {
        g_print("Connect EGL stream cuda producer failed.\n");
        return -1;
    }

    // Firstly, call cuEGLStreamProducerPresentFrame to push 4 frame buffers to the EGL stream FIFO.
    eglStreamProducer->presentFrameBuffers(4);

    // start the cuda producer
    std::thread t = std::thread(producerThreadFunc);

    t.join();
    gst_element_set_state(pipeline, GST_STATE_NULL);
    gst_object_unref(pipeline);
    delete eglStreamProducer;
    return 0;
}

eglstreamproducer.h

#ifndef EGLSTREAMPRODUCER_H
#define EGLSTREAMPRODUCER_H

#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <cudaEGL.h>

class EGLStreamProducer
{
public:
    EGLStreamProducer(int fifoLength, int latency, int width, int height);
    ~EGLStreamProducer();

    EGLDisplay getEGLDisplay() {
        return display;
    }

    EGLStreamKHR getEGLStream() {
        return stream;
    }

    bool connectEGLProducer();
    int presentFrameBuffers(int bufferNum);
    int presentFrame(CUdeviceptr data);

private:
    bool initEGLDisplay();
    bool initEGLStream();
    void finalizeEGLStream();
    void finalizeEGLCudaProducer();

    EGLDisplay display;
    EGLStreamKHR stream;
    int fifoLength;
    bool fifoMode;
    int latency;
    int width;
    int height;

    CUeglStreamConnection cudaConnection;
};


#endif // EGLSTREAMPRODUCER_H

eglstreamproducer.cpp

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>

#include "eglstreamproducer.h"
#include "EGLAPIAccessors.hpp"

EGLStreamProducer::EGLStreamProducer(int fifoLength, int latency, int width, int height)
{
    display = EGL_NO_DISPLAY;
    stream = EGL_NO_STREAM_KHR;

    this->fifoLength = fifoLength;
    if (fifoLength > 0) {
        fifoMode = true;
    } else {
        fifoMode = false;
    }
    this->latency = latency;
    this->width = width;
    this->height = height;

    printf("CUDA producer initializing EGL display.\n");
    if (!initEGLDisplay()) {
        printf("Cannot initialize EGL display.\n");
        return;
    }

    printf("CUDA producer initializing EGL stream.\n");
    if (!initEGLStream()) {
        printf("Cannot initialize EGL Stream.\n");
        return;
    }
}

EGLStreamProducer::~EGLStreamProducer()
{
    finalizeEGLCudaProducer();
    finalizeEGLStream();
}

bool EGLStreamProducer::connectEGLProducer()
{
    printf("Connect EGL stream to cuda producer.\n");

    if (cudaFree(nullptr) != cudaSuccess) {
        printf("Failed to initialize CUDA context.\n");
        return false;
    }

    CUresult ret = cuEGLStreamProducerConnect(&cudaConnection, stream, width, height);
    if (ret != CUDA_SUCCESS) {
        printf("Connect CUDA producer ERROR %d.\n", ret);
        return false;
    }

    return true;
}

int EGLStreamProducer::presentFrameBuffers(int bufferNum)
{
    CUresult ret;

    if (cudaFree(nullptr) != cudaSuccess) {
        printf("Failed to initialize CUDA context.\n");
        return -1;
    }

    for (int i = 0; i < bufferNum; i++) {
        CUarray cudaArr[3] = {0};
        CUDA_ARRAY3D_DESCRIPTOR desc = {0};
        desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
        desc.Depth = 1;
        desc.NumChannels = 1;
        desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
        for (int i = 0; i < 3; i++) {
            if (i == 0) {
                desc.Width = width;
                desc.Height = height;
            } else {
                desc.Width = width / 2;
                desc.Height = height / 2;
            }

            ret = cuArray3DCreate(&cudaArr[i], &desc);
            if (ret != CUDA_SUCCESS) {
                printf("CUDA create 3D array failed: %d.\n", ret);
                return -1;
            }
        }

        CUeglFrame eglFrame;
        eglFrame.planeCount = 3;
        eglFrame.numChannels = 1;
        eglFrame.width = width;
        eglFrame.height = height;
        eglFrame.depth = 1;
        eglFrame.pitch = 0;
        eglFrame.cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
        eglFrame.eglColorFormat = CU_EGL_COLOR_FORMAT_YUV420_PLANAR;
        eglFrame.frameType = CU_EGL_FRAME_TYPE_ARRAY;
        eglFrame.frame.pArray[0] = cudaArr[0];
        eglFrame.frame.pArray[1] = cudaArr[1];
        eglFrame.frame.pArray[2] = cudaArr[2];

        printf("CUDA producer present frame: %p.\n", eglFrame.frame.pArray[0]);

        CUresult ret = cuEGLStreamProducerPresentFrame(&cudaConnection, eglFrame, nullptr);
        if (ret != CUDA_SUCCESS) {
            printf("CUDA producer present frame failed: %d.\n", ret);
            return -1;
        }
    }

    return 0;
}

int EGLStreamProducer::presentFrame(CUdeviceptr data)
{
    CUresult ret;

    if (cudaFree(nullptr) != cudaSuccess) {
        printf("Failed to initialize CUDA context.\n");
        return -1;
    }

    CUeglFrame eglFrame;
    ret = cuEGLStreamProducerReturnFrame(&cudaConnection, &eglFrame, nullptr);
    if (ret != CUDA_SUCCESS) {
        printf("CUDA producer return frame failed: %d.\n", ret);
        return -1;
    }

    printf("CUDA producer return frame: %p.\n", eglFrame.frame.pArray[0]);

    CUDA_MEMCPY3D cpdesc;
    size_t offsets[3], copyWidth[3], copyHeight[3];
    offsets[0] = 0;
    offsets[1] = width * height;
    offsets[2] = offsets[1] + width * height / 4;
    copyWidth[0] = width;
    copyWidth[1] = width / 2;
    copyWidth[2] = width / 2;
    copyHeight[0] = height;
    copyHeight[1] = height / 2;
    copyHeight[2] = height / 2;

    for (int i = 0; i < 3; i++) {
        memset(&cpdesc, 0, sizeof(cpdesc));
        cpdesc.srcMemoryType = CU_MEMORYTYPE_DEVICE;
        cpdesc.srcDevice = (CUdeviceptr)((char *)data + offsets[i]);
        cpdesc.dstMemoryType = CU_MEMORYTYPE_ARRAY;
        cpdesc.dstArray = eglFrame.frame.pArray[i];
        cpdesc.WidthInBytes = copyWidth[i];
        cpdesc.Height = copyHeight[i];
        cpdesc.Depth = 1;

        ret = cuMemcpy3D(&cpdesc);
//        ret = cuMemcpyDtoA(eglFrame.frame.pArray[i], 0, (CUdeviceptr)((char *)data + offsets[i]), 1);
        if (ret != CUDA_SUCCESS) {
            printf("CUDA producer copy data to EGL frame failed: %d.\n", ret);
            return -1;
        }
    }

    ret = cuEGLStreamProducerPresentFrame(&cudaConnection, eglFrame, nullptr);
    if (ret != CUDA_SUCCESS) {
        printf("CUDA producer present frame failed: %d.\n", ret);
        return -1;
    }

    return 0;
}

bool EGLStreamProducer::initEGLDisplay()
{
    // Obtain the EGL display
    display = EGLDisplayAccessor::getInstance();
    if (display == EGL_NO_DISPLAY) {
        printf("Obtain EGL display failed.\n");
        return false;
    }

    return true;
}

bool EGLStreamProducer::initEGLStream()
{
    const EGLint streamAttrMailboxMode[] = { EGL_NONE };
    const EGLint streamAttrFIFOMode[] = { EGL_STREAM_FIFO_LENGTH_KHR, fifoLength, EGL_NONE };

    if (!setupEGLExtensions()) {
        return false;
    }

    stream = eglCreateStreamKHR(display, fifoMode ? streamAttrFIFOMode : streamAttrMailboxMode);
    if (stream == EGL_NO_STREAM_KHR) {
        printf("Couldn't create stream.\n");
        return false;
    }

    if (!eglStreamAttribKHR(display, stream, EGL_CONSUMER_LATENCY_USEC_KHR, latency)) {
        printf("Producer: streamAttribKHR EGL_CONSUMER_LATENCY_USEC_KHR failed.\n");
    }
    if (!eglStreamAttribKHR(display, stream, EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR, latency)) {
        printf("Producer: streamAttribKHR EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR failed.\n");
    }

    // Get stream attributes
    if (!eglQueryStreamKHR(display, stream, EGL_STREAM_FIFO_LENGTH_KHR, &fifoLength)) {
        printf("Producer: eglQueryStreamKHR EGL_STREAM_FIFO_LENGTH_KHR failed.\n");
    }
    if (!eglQueryStreamKHR(display, stream, EGL_CONSUMER_LATENCY_USEC_KHR, &latency)) {
        printf("Producer: eglQueryStreamKHR EGL_CONSUMER_LATENCY_USEC_KHR failed.\n");
    }

    if (fifoMode != (fifoLength > 0)) {
        printf("EGL Stream consumer - Unable to set FIFO mode.\n");
        fifoMode = false;
    }
    if (fifoMode) {
        printf("EGL Stream consumer - Mode: FIFO, Length: %d, latency %d.\n", fifoLength, latency);
    } else {
        printf("EGL Stream consumer - Mode: Mailbox.\n");
    }

    return true;
}

void EGLStreamProducer::finalizeEGLStream()
{
    if (stream != EGL_NO_STREAM_KHR) {
        eglDestroyStreamKHR(display, stream);
        stream = EGL_NO_STREAM_KHR;
    }
}

void EGLStreamProducer::finalizeEGLCudaProducer()
{
    if (cudaConnection) {
        if (cudaFree(nullptr) != cudaSuccess) {
            printf("Failed to initialize CUDA context.\n");
            return;
        }

        cuEGLStreamProducerDisconnect(&cudaConnection);
        cudaConnection = nullptr;
    }
}

EGLAPIAccessors.hpp

#ifndef EGLAPIACCESSORS_HPP
#define EGLAPIACCESSORS_HPP

#include <EGL/egl.h>
#include <EGL/eglext.h>

#if !defined EGL_KHR_stream || !defined EGL_KHR_stream_fifo || !defined EGL_KHR_stream_consumer_gltexture
# error "EGL_KHR_stream extensions are not supported!"
#endif

class EGLDisplayAccessor
{
public:
    static EGLDisplay getInstance();

private:
    EGLDisplayAccessor();
    ~EGLDisplayAccessor();

    EGLDisplay eglDisplay;
};

#define EXTENSION_LIST_MY(T)                                     \
    T( PFNEGLCREATESTREAMKHRPROC,          eglCreateStreamKHR )  \
    T( PFNEGLDESTROYSTREAMKHRPROC,         eglDestroyStreamKHR ) \
    T( PFNEGLQUERYSTREAMKHRPROC,           eglQueryStreamKHR )   \
    T( PFNEGLSTREAMATTRIBKHRPROC,          eglStreamAttribKHR )


#define EXTLST_EXTERN(tx, x) extern tx x;

EXTENSION_LIST_MY(EXTLST_EXTERN)

bool setupEGLExtensions();


#endif // EGLAPIACCESSORS_HPP

EGLAPIAccessors.cpp

#include <stdio.h>

#include "EGLAPIAccessors.hpp"


EGLDisplay EGLDisplayAccessor::getInstance()
{
    static EGLDisplayAccessor instance;
    return instance.eglDisplay;
}

EGLDisplayAccessor::EGLDisplayAccessor()
{
    // Obtain the EGL display
    if ((eglDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY)) == EGL_NO_DISPLAY) {
        printf("EGL failed to obtain display.\n");
    }

    // Initialize EGL
    EGLint major, minor;
    if (!eglInitialize(eglDisplay, &major, &minor)) {
        printf("EGL failed to initialize.\n");
        eglTerminate(eglDisplay);
        eglDisplay = EGL_NO_DISPLAY;
    } else {
        printf("EGL API: %d.%d\n", major, minor);
    }
}

EGLDisplayAccessor::~EGLDisplayAccessor()
{
    if (eglDisplay != EGL_NO_DISPLAY) {
        eglTerminate(eglDisplay);
        eglDisplay = EGL_NO_DISPLAY;

        printf("Terminate EGL display.\n");
        fflush(stdout);
    }
}


static bool initialized = false;

#define EXTLST_IMPL_MY(tx, x) tx x = nullptr;
EXTENSION_LIST_MY(EXTLST_IMPL_MY)

typedef void (* extlst_fnptr_t)(void);
#define EXTLST_ENTRY_MY(tx, x) { ( extlst_fnptr_t *)&x, #x },

static struct {
    extlst_fnptr_t * fnptr;
    char const * name;
} extensionList[] = { EXTENSION_LIST_MY(EXTLST_ENTRY_MY) };

bool setupEGLExtensions()
{
    if (!initialized) {
        for (size_t i = 0; i < sizeof(extensionList) / sizeof(extensionList[0]); i++) {
            *extensionList[i].fnptr = eglGetProcAddress(extensionList[i].name);
            if (!*extensionList[i].fnptr) {
                printf("Couldn't get address of %s()\n", extensionList[i].name);
                return false;
            }
        }

        initialized = true;
    }

    return true;
}

Hi,
Please try the following flow:

present frame 1
present frame 2
wait for frame 1 to return
present frame 3
wait for frame 2 to return
present frame 4
wait for frame 3 to return
...

After calling cuEGLStreamProducerPresentFrame(i), always wait for cuEGLStreamProducerReturnFrame(i-1) being returned.

Hi DaneLLL,
I had try your proposed flow, and still can not work properly on FIFO size 4 when i initially present 4 frame buffers, the returned frames still out of order.

My test case is the same as your proposed flow except that i wait for the returned frame on the next 40ms timepoint.

My test case work fine on TX1 flashed by jetpack 3.0, now i am running on TX2 flashed by jetpack 3.1. I still can not figure out the problem, any help would be appreciate, thanks.

Hi SongjianSu, Can you also try flashing TX1 via Jetpack 3.1?

Hi SongjianSu, can you check if sm_62 is in Makefile? It is required for TX2.

Hi DaneLLL, My original test code is c++ source code, and i have changed all filename suffix to .cu, and all code is compiled by nvcc with -arch=sm_62 flag, but the result is still the same as before.

Here is the compile log:

/usr/local/cuda-8.0/bin/nvcc -D_DEBUG -D_GLIB_TEST_OVERFLOW_FALLBACK -std=c++11 -I"/usr/include/gstreamer-1.0" -I"/usr/include/glib-2.0" -I"/usr/lib/aarch64-linux-gnu/glib-2.0/include" -I"/usr/lib/aarch64-linux-gnu/gstreamer-1.0/include" -I"/usr/local/cuda-8.0/include" --machine 64 -arch=sm_62 -c -o eglstreamproducer_cuda.o ../nveglstreamsrctest/eglstreamproducer.cu
../nveglstreamsrctest/eglstreamproducer.cu: In member function 'void EGLStreamProducer::returnFrame()':
../nveglstreamsrctest/eglstreamproducer.cu:210:109: warning: format '%lld' expects argument of type 'long long int', but argument 3 has type 'int64_t {aka long int}' [-Wformat=]
/usr/local/cuda-8.0/bin/nvcc -D_DEBUG -D_GLIB_TEST_OVERFLOW_FALLBACK -std=c++11 -I"/usr/include/gstreamer-1.0" -I"/usr/include/glib-2.0" -I"/usr/lib/aarch64-linux-gnu/glib-2.0/include" -I"/usr/lib/aarch64-linux-gnu/gstreamer-1.0/include" -I"/usr/local/cuda-8.0/include" --machine 64 -arch=sm_62 -c -o main_cuda.o ../nveglstreamsrctest/main.cu
/usr/local/cuda-8.0/bin/nvcc -D_DEBUG -D_GLIB_TEST_OVERFLOW_FALLBACK -std=c++11 -I"/usr/include/gstreamer-1.0" -I"/usr/include/glib-2.0" -I"/usr/lib/aarch64-linux-gnu/glib-2.0/include" -I"/usr/lib/aarch64-linux-gnu/gstreamer-1.0/include" -I"/usr/local/cuda-8.0/include" --machine 64 -arch=sm_62 -c -o EGLAPIAccessors_cuda.o ../nveglstreamsrctest/EGLAPIAccessors.cu
g++  -o nveglstreamsrctest eglstreamproducer_cuda.o main_cuda.o EGLAPIAccessors_cuda.o   -lgstreamer-1.0 -lglib-2.0 -lgobject-2.0 -lEGL -L/usr/local/cuda-8.0/lib64 -lcuda -lcudart -lpthread

Hi SongjianSu,
could share your implementation so that we can reproduce it on r28.1/TX2? you have attach a few source files, could you also share Makefile and steps to reproduce the issue?

Hi DaneLLL, I submit my test case project in the attachment. After unpacking, run make, and ./nveglstreamsrctest to run the test.
nveglstreamsrctest.tar.gz (5 KB)

Hi SonhjianSu,
Please refer to attachment. It implements

present frame 1
present frame 2
wait for frame 1 to return
present frame 3
wait for frame 2 to return
present frame 4
wait for frame 3 to return
...

The log

nvidia@tegra-ubuntu:~/nveglstreamsrctest$ ./nveglstreamsrctest
CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 4, latency 0.

NvEglStreamSrcInitializeEgl: Load library: libEGL.so
Connect EGL stream to cuda producer.
CUDA producer present frame: 0x5c2600000.
CUDA producer present frame: 0x5c2800000.
Returned frame 0x5c2600000 used time 1 ms.
CUDA producer present frame: 0x5c2a00000.
Returned frame 0x5c2800000 used time 1 ms.
CUDA producer present frame: 0x5c2c00000.
Returned frame 0x5c2a00000 used time 1 ms.
============================
Present a new frame 1. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 1 ms.
Present a new frame 2. buf=0x5c2800000
Returned frame 0x5c2600000 used time 1 ms.
Present a new frame 3. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 10 ms.
Present a new frame 4. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 10 ms.
Present a new frame 5. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 10 ms.
Present a new frame 6. buf=0x5c2800000
Returned frame 0x5c2600000 used time 10 ms.
Present a new frame 7. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 10 ms.
Present a new frame 8. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 10 ms.
Present a new frame 9. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 10 ms.
Present a new frame 10. buf=0x5c2800000
Returned frame 0x5c2600000 used time 10 ms.
Present a new frame 11. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 10 ms.
Present a new frame 12. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 10 ms.
Present a new frame 13. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 10 ms.
Present a new frame 14. buf=0x5c2800000
Returned frame 0x5c2600000 used time 10 ms.
Present a new frame 15. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 16. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 10 ms.
Present a new frame 17. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 10 ms.
Present a new frame 18. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 19. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 10 ms.
Present a new frame 20. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 21. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 22. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 23. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 24. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 25. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 26. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 27. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 28. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 29. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 30. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 31. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 32. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 33. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 34. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 35. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 36. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 37. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 38. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 39. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 40. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 41. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 42. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 43. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 44. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 45. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 46. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Present a new frame 47. buf=0x5c2a00000
Returned frame 0x5c2800000 used time 9 ms.
Present a new frame 48. buf=0x5c2c00000
Returned frame 0x5c2a00000 used time 9 ms.
Present a new frame 49. buf=0x5c2600000
Returned frame 0x5c2c00000 used time 9 ms.
Present a new frame 50. buf=0x5c2800000
Returned frame 0x5c2600000 used time 9 ms.
Terminate EGL display.

nveglstreamsrctest_1.zip (7.02 KB)

Hi DaneLLL, is this a convention or a bug? Why can’t I just keep more than 2 buffers in FIFO queue?

From this simple test case, keep 2 buffers in FIFO queue at most often have to waiting for the ‘returned buffer’ a few milliseconds.

Hi SongjianSu,
This is the design. You don’t need to do return_Frame after avery present_Frame. It works like

present frame 1
present frame 2
present frame 3
present frame 4
return frame... get frame 3

When you get frame 3 in return_Frame, it means frame 1, frame 2, and frame 3 are returned.

Hi DaneLLL, under the design that we don’t actually need to do return_Frame after every present_Frame, and the present_Frame will be blocked when EGL stream FIFO is full, I think the following algorithm will be right:

First, alloc buffer pool of size 2 more than the FIFO size, for example, if the FIFO size is 4, we alloc 6 buffers for the pool.
Then, we use and present the buffers in the manner of circular queue, i.e, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, ...

Is this right? and is there a public specification about the CUDA consumer and producer?

Hi SongjianSu,
The fifo mode follows https://www.khronos.org/registry/EGL/extensions/KHR/EGL_KHR_stream_fifo.txt

Hi DaneLLL, I have read this article before, but this is not about CUDA consumer/producer.

Hi SongjianSu,
The CUDA document is at http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EGL.html

There is a sample at ~/NVIDIA_CUDA-8.0_Samples/3_Imaging/EGLStreams_CUDA_Interop

Hi DaneLLL, thanks for the reply.

The ~/NVIDIA_CUDA-8.0_Samples/3_Imaging/EGLStreams_CUDA_Interop sample i have been read before, it’s too simple, just pass 2 frames from producer to consumer, and no waiting for the returned frame.

However, I found that there is a more complicated sample ~/NVIDIA_CUDA-8.0_Samples/3_Imaging/EGLStream_CUDA_CrossGPU on r28.1, but indeed it waits for the return_Frame on every present_Frame in a dead loop manner. It means we have to wait for the return_Frame on every present_Frame to keep it working correctly.

So now I am very confused, do I need to wait for the return_Frame on every present_Frame or not?

It should work just as I said in #11. You do return_Frame when all frames are presented. If it returns frame 3, it means buffers of frame 1, 2, 3 are returned.

Hi DaneLLL, thanks for the reply.

ISSUE 1:
I have done more experiments, the EGL stream FIFO is 4, and initially present 10 frames to the EGL stream FIFO, then wait for some time, and then examine the returned frames. But no matter how many frames I present initially and how long I wait, only the first 3 frames returned on the first 3 return_Frame calls, and I can’t see the latter frames returned, for example, the 4th, 7th, etc.

nvidia@tegra-ubuntu:~/program/nveglstreamsrctest$ ./nveglstreamsrctest 
CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 4, latency 0.

NvEglStreamSrcInitializeEgl: Load library: libEGL.so
Connect EGL stream to cuda producer.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
CUDA producer present frame: 0x5c2a00000 used time 0 ms.
CUDA producer present frame: 0x5c2c00000 used time 0 ms.
CUDA producer present frame: 0x5c2e00000 used time 0 ms.
CUDA producer present frame: 0x5c3000000 used time 0 ms.
CUDA producer present frame: 0x5c3200000 used time 0 ms.
CUDA producer present frame: 0x5c3400000 used time 0 ms.
CUDA producer present frame: 0x5c3600000 used time 0 ms.
CUDA producer present frame: 0x5c3800000 used time 0 ms.
============= wait some time =============
Returned frame 0x5c2600000 used time 0 ms.
Present a new frame 1.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
Returned frame 0x5c2800000 used time 0 ms.
Present a new frame 2.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
Returned frame 0x5c2a00000 used time 0 ms.
Present a new frame 3.
CUDA producer present frame: 0x5c2a00000 used time 0 ms.
Returned frame 0x5c2600000 used time 0 ms.
Present a new frame 4.
CUDA producer present frame: 0x5c2600000 used time 5 ms.
Returned frame 0x5c2800000 used time 0 ms.
Present a new frame 5.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
Returned frame 0x5c2a00000 used time 0 ms.
Present a new frame 6.
CUDA producer present frame: 0x5c2a00000 used time 1 ms.
Returned frame 0x5c2600000 used time 0 ms.
Present a new frame 7.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
Returned frame 0x5c2800000 used time 0 ms.
Present a new frame 8.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
Returned frame 0x5c2a00000 used time 0 ms.
Present a new frame 9.
CUDA producer present frame: 0x5c2a00000 used time 0 ms.
Returned frame 0x5c2600000 used time 0 ms.
Present a new frame 10.
CUDA producer present frame: 0x5c2600000 used time 0 ms.

ISSUE 2:
The test case (see attachment) sometimes return error 999 (CUDA_ERROR_UNKNOWN) on the initially present frames, and then return_Frame always return 702 (CUDA_ERROR_LAUNCH_TIMEOUT).

nvidia@tegra-ubuntu:~/program/nveglstreamsrctest$ ./nveglstreamsrctest 
CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 4, latency 0.

NvEglStreamSrcInitializeEgl: Load library: libEGL.so
Connect EGL stream to cuda producer.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
CUDA producer present frame: 0x5c2a00000 used time 0 ms.
CUDA producer present frame: 0x5c2c00000 used time 0 ms.
CUDA producer present frame: 0x5c2e00000 used time 0 ms.
CUDA producer present frame: 0x5c3000000 used time 0 ms.
CUDA producer present frame: 0x5c3200000 used time 0 ms.
CUDA producer present frame: 0x5c3400000 used time 0 ms.
CUDA producer present frame: 0x5c3600000 used time 0 ms.
CUDA producer present frame failed: 999.
============= wait some time =============
Returned frame 0x5c2600000 used time 0 ms.
Present a new frame 1.
CUDA producer present frame: 0x5c2600000 used time 1 ms.
Returned frame 0x5c2800000 used time 0 ms.
Present a new frame 2.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
^C   //<-- "ctrl + c", block on cuEGLStreamProducerReturnFrame, always return CUDA_ERROR_LAUNCH_TIMEOUT
nvidia@tegra-ubuntu:~/program/nveglstreamsrctest$

ISSUE 3:
When I post this thread, I find the out of order issue in my project. To illustrate the issue, i build the test case, but can’t completely reproduce the issue.

The steps is the same as the test case, first present 4 frames to the EGL stream FIFO, and then do return_Frame to get the buffer, and then present_Frame to EGL stream FIFO. Since the first 4 return_Frame calls only return the 1th and 2nd buffers, it should not return the 3th and 4th buffer later any more. But later on at some point, the 3th and 4th buffers return.

Here is the key log, for the whole output log, please refer to the attachment, For clearly, I have replace the 4 different addresses with 1, 2, 3 and 4 respectively.

<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
...
some time later
<../Project/eglframeproducer.cpp, 112>: Return frame 1
<../Project/eglframeproducer.cpp, 112>: Return frame 2
<../Project/eglframeproducer.cpp, 112>: Return frame 3
<../Project/eglframeproducer.cpp, 112>: Return frame 4
...

There seems to be have some bugs, is there any known bugs, and how can i cope with these issues, thanks.
nveglstreamsrctest.tar.gz (5.11 KB)
log.txt (113 KB)

Hi SongjianSu,
[s]For FIFO=4, you can only have 4 buffers in your application. If you need 10 buffers, please set FIFO=10. UPDATE nveglstreamsrc supports max buffers=8, so max FIFO=8 also.
and please use the buffers in ring mode:
present frame 1
present frame 2
present frame 3
present frame 4 // app owns no frame
return frame… get frame 3 // app owns frame 1 2 3
present frame 1
present frame 2
present frame 3 // app owns no frame
return frame… get frame 2 // app owns frame 4 1 2
present frame 4
present frame 1
present frame 2 // // app owns no frame
return frame… gst frame 4 // app owns frame 3 4

We have customers run it successfully. If you cannot get it work, you should check your code. We have helped debug your code in #8, #9, and cannot always do the favor.[/s]

UPDATE The above comment is wrong. Please refer to #9 https://devtalk.nvidia.com/default/topic/1023481/jetson-tx2/frames-returned-from-nveglstreamsrc-via-egl-stream-out-of-order/post/5209249/#5209249

I am sad to hear that, but I think I am helping report the bug. As you say, I set FIFO=10, and present 10 buffers, it still not working, and the pipepline report error message:
0:00:00.114342482 6205 0x656320 ERROR nveglstream gstnveglstreamsrc.c:570:gst_nvconsumer_buffer_pool_release_buffer: Failed to release EGLStream Frame.

nvidia@tegra-ubuntu:~/program/nveglstreamsrctest$ GST_DEBUG=3 ./nveglstreamsrctest 
CUDA producer initializing EGL display.
EGL API: 1.5
CUDA producer initializing EGL stream.
EGL Stream consumer - Mode: FIFO, Length: 10, latency 0.

NvEglStreamSrcInitializeEgl: Load library: libEGL.so
0:00:00.056235207  6205       0x656320 FIXME                default gstutils.c:3766:gst_pad_create_stream_id_internal:<nveglstreamsrc0:src> Creating random stream-id, consider implementing a deterministic way of creating a stream-id
Connect EGL stream to cuda producer.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
CUDA producer present frame: 0x5c2a00000 used time 0 ms.
CUDA producer present frame: 0x5c2c00000 used time 0 ms.
CUDA producer present frame: 0x5c2e00000 used time 0 ms.
CUDA producer present frame: 0x5c3000000 used time 0 ms.
CUDA producer present frame: 0x5c3200000 used time 0 ms.
CUDA producer present frame: 0x5c3400000 used time 0 ms.
CUDA producer present frame failed: 999.
============= wait some time =============
Returned frame 0x5c2600000 used time 0 ms.
0:00:00.114223154  6205       0x656320 ERROR            nveglstream gstnveglstreamsrc.c:570:gst_nvconsumer_buffer_pool_release_buffer:<nveglstreamsrc0> Failed to release EGLStream Frame.
0:00:00.114298386  6205       0x656320 ERROR            nveglstream gstnveglstreamsrc.c:570:gst_nvconsumer_buffer_pool_release_buffer:<nveglstreamsrc0> Failed to release EGLStream Frame.
0:00:00.114342482  6205       0x656320 ERROR            nveglstream gstnveglstreamsrc.c:570:gst_nvconsumer_buffer_pool_release_buffer:<nveglstreamsrc0> Failed to release EGLStream Frame.
Present a new frame 1.
CUDA producer present frame: 0x5c2600000 used time 0 ms.
Returned frame 0x5c2800000 used time 0 ms.
Present a new frame 2.
CUDA producer present frame: 0x5c2800000 used time 0 ms.
^C
nvidia@tegra-ubuntu:~/program/nveglstreamsrctest$