Opencv gpu mat into GStreamer without downloading to cpu

I’ve mentioned pong above, and finally played to make a poor one-player version from this, just as an example of drawing with opencv cuda.
The design and gameplay are poor, but this is just a fun example, anyone is welcome to improve such as adding a second player controlled by AI…
You would just need an opencv version with CUDA enabled (tested with opencv-4.5.3).

Opencv CUDA NVMM gstreamer appsrc one-player mouse-controlled PONG

poorpong_nvmm.cpp
#include <cstdlib>
#include <gst/gst.h>
#include <gst/gstinfo.h>
#include <gst/app/gstappsrc.h>
#include <glib-unix.h>
#include <dlfcn.h>
#include <stropts.h>
#include <poll.h>

#include <cstring>
#include <iostream>
#include <sstream>
#include <thread>
#include <vector>

#include "nvbuf_utils.h"
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudaEGL.h>
#include "X11/Xlib.h"

#include <opencv2/core.hpp>
#include <opencv2/core/cuda.hpp>
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"

using namespace std;

#define USE(x) ((void)(x))


typedef struct {
    int left_bar_ypos;
    int ball_xpos;
    int ball_ypos;
    int digits_xpos;
    int count10_idx;
    int count1_idx;
} pongDisplayData;


const int frame_width = 1920;
const int frame_height = 1080;

const int ball_size = 50;
const int initial_speed = 30;

const int bar_width = 50;
const int bar_height = 300;

const int digit_height=200;
const int digit_width=(digit_height*60)/100;

// R, G, B, A
const cv::Scalar background_color(0,0,255,255);
const cv::Scalar ball_color(255,255,0,255);
const cv::Scalar bar_color(255,0,0,255);
const cv::Scalar digits_color(0,255,0,255);


static GstPipeline *gst_pipeline = nullptr;
static string launch_string;
static GstElement *appsrc_;
GstClockTime timestamp = 0;

EGLDisplay egl_display;


static int display_w;
static int display_h;

static int GetPointer_Y (Display *display, Window *root_window) {
    Window r, c;
    int x, y, rx, ry;
    unsigned int m;
    bool b = XQueryPointer(display, *root_window, &r, &c, &rx, &ry, &x, &y, &m);
    if (b) {
        // Scale from display to frame
        return (int)((frame_height - bar_height)*(y/(double)display_h));
    }
    else
        return (-1);
}

static cv::cuda::GpuMat d_ball_mask(ball_size, ball_size, CV_8UC1);
static cv::cuda::GpuMat d_bar_mask(bar_height, bar_width, CV_8UC1);
static std::vector< cv::cuda::GpuMat > d_numbers_mask(10);

static void PrepareMasks(void)
{
    // Bar mask
    cv::Mat h_bar_mask = cv::Mat(bar_height, bar_width, CV_8UC1);
    h_bar_mask.setTo(cv::Scalar(255));
    d_bar_mask.upload(h_bar_mask);

    // Ball mask
    cv::Mat h_ball_mask = cv::Mat::zeros(ball_size, ball_size, CV_8UC1);
    cv::circle(h_ball_mask, cv::Point(ball_size/2, ball_size/2), ball_size/2, cv::Scalar(255), cv::FILLED, 8, 0);
    d_ball_mask.upload(h_ball_mask);

    // [0-9] digits masks
    for(unsigned int i=0; i <10; ++i)
    {
        char buf[2];
        sprintf(buf, "%d", i);
        cv::Mat h_digit_mask = cv::Mat::zeros(digit_height, digit_width, CV_8UC1);
        cv::putText (h_digit_mask, buf, cv::Point (0,digit_width), cv::FONT_HERSHEY_SIMPLEX, digit_height/40, cv::Scalar(255), digit_height/10);
        d_numbers_mask[i].upload(h_digit_mask);
    }
}


static void notify_to_destroy (gpointer user_data)
{
    GST_INFO ("NvBufferDestroy(%d)", *(int *)user_data);
    NvBufferDestroy(*(int *)user_data);
    g_free(user_data);
}


static gboolean feed_function(gpointer d) {
    GstBuffer *buffer;
    GstFlowReturn ret;
    GstMapInfo map = {0};
    int dmabuf_fd = 0;
    gpointer data = NULL, user_data = NULL;
    NvBufferParams par;
    GstMemoryFlags flags = (GstMemoryFlags)0;

    {
        static int frame_num=0;

        NvBufferCreate(&dmabuf_fd, frame_width, frame_height, NvBufferLayout_Pitch, NvBufferColorFormat_ABGR32);
        //CUDA process
        {
            EGLImageKHR egl_image;
            egl_image = NvEGLImageFromFd(egl_display, dmabuf_fd);
            CUresult status;
            CUeglFrame eglFrame;
            CUgraphicsResource pResource = NULL;
            cudaFree(0);
            status = cuGraphicsEGLRegisterImage(&pResource,
                                                egl_image,
                                                CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
            if (status != CUDA_SUCCESS)
            {
                printf("cuGraphicsEGLRegisterImage failed: %d \n",status);
            }
            status = cuGraphicsResourceGetMappedEglFrame(&eglFrame, pResource, 0, 0);
            status = cuCtxSynchronize();

            // CUDA code here
            {
                cv::cuda::GpuMat dmat(frame_height, frame_width,CV_8UC4,eglFrame.frame.pPitch[0]);
                pongDisplayData *data = (pongDisplayData*) d;

                // Set background
                dmat.setTo(background_color);

                // Draw Count digits
                {
                    cv::cuda::GpuMat roi(dmat, cv::Rect(cv::Point(data->digits_xpos, 10), cv::Size(digit_width, digit_height)));
                    roi.setTo(digits_color, d_numbers_mask[data->count10_idx]);
                }
                {
                    cv::cuda::GpuMat roi(dmat, cv::Rect(cv::Point(data->digits_xpos + digit_width, 10), cv::Size(digit_width, digit_height)));
                    roi.setTo(digits_color, d_numbers_mask[data->count1_idx]);
                }

                // Draw left bar
                {
                    cv::cuda::GpuMat roi(dmat, cv::Rect(cv::Point(50, data->left_bar_ypos), cv::Size(bar_width, bar_height)));
                    roi.setTo(bar_color, d_bar_mask);
                }

                // Draw ball
                {
                    cv::cuda::GpuMat roi(dmat, cv::Rect(data->ball_xpos, data->ball_ypos, ball_size, ball_size));
                    roi.setTo(ball_color, d_ball_mask);
                }

                // Safety check
                if (dmat.data != eglFrame.frame.pPitch[0])
                    fprintf (stderr, "Error: re-allocated dmat\n");
            }

            status = cuCtxSynchronize();
            status = cuGraphicsUnregisterResource(pResource);
            NvDestroyEGLImage(egl_display, egl_image);
        }
        user_data = g_malloc(sizeof(int));
        GST_INFO ("NvBufferCreate %d", dmabuf_fd);
        *(int *)user_data = dmabuf_fd;

        NvBufferGetParams (dmabuf_fd, &par);
        data = g_malloc(par.nv_buffer_size);

        buffer = gst_buffer_new_wrapped_full(flags,
                                             data,
                                             par.nv_buffer_size,
                                             0,
                                             par.nv_buffer_size,
                                             user_data,
                                             notify_to_destroy);

        GST_BUFFER_PTS(buffer) = timestamp;
        GST_BUFFER_DTS(buffer) = timestamp;
        GST_BUFFER_OFFSET(buffer) = frame_num++;
        GST_BUFFER_DURATION(buffer) = ((double)1/30) * GST_SECOND;

        gst_buffer_map (buffer, &map, GST_MAP_WRITE);
        memcpy(map.data, par.nv_buffer, par.nv_buffer_size);
        gst_buffer_unmap(buffer, &map);

        g_signal_emit_by_name (appsrc_, "push-buffer", buffer, &ret);
        gst_buffer_unref(buffer);
    }

    // Free pongDisplayData
    g_free(d);

    timestamp += 33333333;
    return G_SOURCE_CONTINUE;
}




typedef struct {
    int count;
    int left_bar_ypos;
    int left_bar_yspeed;
    int ball_xpos;
    int ball_ypos;
    int ball_xspeed;
    int ball_yspeed;
} pongCtrlData;



int PoorPongController_onePlayer(pongCtrlData& ctrlData) {
    // ball projection for next step
    int ball_xnext = ctrlData.ball_xpos + ctrlData.ball_xspeed;
    int ball_ynext = ctrlData.ball_ypos + ctrlData.ball_yspeed;

    if ((ball_xnext < 50 + bar_width) && (ball_ynext + ball_size > ctrlData.left_bar_ypos) && (ball_ynext < ctrlData.left_bar_ypos + bar_height)) {
        // Ball is in bar zone... reverse xspeed
        ctrlData.ball_xspeed = -ctrlData.ball_xspeed;
        // Set new xpos
        ctrlData.ball_xpos = 50 + (ctrlData.ball_xpos - ball_xnext);
        // Add 50% of bar yspeed
        ctrlData.ball_yspeed += (int)(0.5*ctrlData.left_bar_yspeed);
        // Increase count ane both speeds with 10%
        ++ctrlData.count;
        ctrlData.ball_xspeed = (int)(1.1*ctrlData.ball_xspeed);
        ctrlData.ball_yspeed = (int)(1.1*ctrlData.ball_yspeed);
    }
    else if (ball_xnext < 0) {
        // Ball reached left of the frame...Game over
        return -1;
    }
    else if ((ball_xnext > frame_width - ball_size) || (ball_xnext < 0)) {
        // Ball reached right of the frame... reverse xspeed
        ctrlData.ball_xspeed = -ctrlData.ball_xspeed;
    }
    else {
        // Ball moved on x, update
        ctrlData.ball_xpos = ball_xnext;
    }

    if ((ball_ynext > frame_height - ball_size) || (ball_ynext < 0)) {
        // Ball reached top or bottom of the frame... reverse yspeed
        ctrlData.ball_yspeed = -ctrlData.ball_yspeed;
    }
    else {
        // Ball moved on y, update
        ctrlData.ball_ypos = ball_ynext;
    }
    return 0;
}

int main(int argc, char** argv) {
    USE(argc);
    USE(argv);

    Display *display;
    display = XOpenDisplay(0);
    display_w = DisplayWidth(display, 0);
    display_h = DisplayHeight(display, 0);

    Window root_window;
    root_window = XRootWindow(display, 0);


    egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
    eglInitialize(egl_display, NULL, NULL);

    PrepareMasks();

    gst_init (&argc, &argv);
    GMainLoop *main_loop;
    main_loop = g_main_loop_new (NULL, FALSE);
    ostringstream launch_stream;
    launch_stream
            << "appsrc name=mysource ! "
            << "video/x-raw(memory:NVMM),width="<< frame_width <<",height="<< frame_height <<",framerate=30/1,format=RGBA ! "
            << "nvegltransform ! nveglglessink";
    //<< "nvvidconv ! video/x-raw(memory:NVMM),format=NV12 ! nvoverlaysink";
    //<< "nvvidconv ! video/x-raw(memory:NVMM),format=NV12 ! nv3dsink";
    //<< "nvvidconv ! video/x-raw, format=YUY2 ! xvimagesink";

    launch_string = launch_stream.str();
    g_print("Using launch string: %s\n", launch_string.c_str());

    GError *error = nullptr;
    gst_pipeline  = (GstPipeline*) gst_parse_launch(launch_string.c_str(), &error);

    if (gst_pipeline == nullptr) {
        g_print( "Failed to parse launch: %s\n", error->message);
        return -1;
    }
    if(error) g_error_free(error);

    appsrc_ = gst_bin_get_by_name(GST_BIN(gst_pipeline), "mysource");
    gst_app_src_set_stream_type(GST_APP_SRC(appsrc_), GST_APP_STREAM_TYPE_STREAM);

    gst_element_set_state((GstElement*)gst_pipeline, GST_STATE_PLAYING);


    // Set initial state
    pongCtrlData ctrlData;
    ctrlData.count = 0;
    ctrlData.left_bar_ypos = GetPointer_Y(display, &root_window);
    ctrlData.left_bar_yspeed = 0;
    ctrlData.ball_xpos = 50 + bar_width;
    ctrlData.ball_ypos = ctrlData.left_bar_ypos + bar_height/2;
    ctrlData.ball_xspeed = initial_speed;
    ctrlData.ball_yspeed = initial_speed/5;

    while (1) {
        double startTime = (double)cv::getTickCount();

        // Get pointer Y pos
        int ret = GetPointer_Y(display, &root_window);
        if (ret >= 0) {
            ctrlData.left_bar_yspeed = ret - ctrlData.left_bar_ypos;
            ctrlData.left_bar_ypos = ret;
        }

        // Update state
        ret = PoorPongController_onePlayer(ctrlData);
        if (ret < 0)
            break;

        // Set display data
        pongDisplayData *dispData = (pongDisplayData *)g_malloc(sizeof(pongDisplayData));
        dispData->left_bar_ypos = ctrlData.left_bar_ypos;
        dispData->ball_xpos = ctrlData.ball_xpos;
        dispData->ball_ypos = ctrlData.ball_ypos;
        dispData->digits_xpos = (int)(frame_width/2 - digit_width);
        dispData->count10_idx =  (ctrlData.count/10)%10; // supports only up to 99
        dispData->count1_idx =  ctrlData.count%10;

        // Draw into NVMM frame
        feed_function(dispData);

        // Compute processing + drawing time, and sleep until next frame if we're early
        double endTime = (double)cv::getTickCount();
        double process_time_us = ((endTime - startTime)/cv::getTickFrequency())*1e6;
        double sleep_us = (33333.3 - process_time_us)/1.01;
        if (sleep_us >= 0)
            usleep(sleep_us);
        else
            printf("Late...\n");
    }

    /* Game over... show last scene for 2 seconds before exit */
    for (int i = 0; i < 60; i++)
    {
        double startTime = (double)cv::getTickCount();
        pongDisplayData *dispData = (pongDisplayData *)g_malloc(sizeof(pongDisplayData));
        dispData->left_bar_ypos = ctrlData.left_bar_ypos;
        dispData->ball_xpos = ctrlData.ball_xpos;
        dispData->ball_ypos = ctrlData.ball_ypos;
        dispData->digits_xpos = (int)(frame_width/2 - digit_width);
        dispData->count10_idx =  (ctrlData.count/10)%10; // supports only up to 99
        dispData->count1_idx =  ctrlData.count%10;

        // Draw frame
        feed_function(dispData);

        // Sleep until next frame if we're early
        double endTime = (double)cv::getTickCount();
        double process_time_us = ((endTime - startTime)/cv::getTickFrequency())*1e6;
        double sleep_us = (33333.3 - process_time_us)/1.01;
        if (sleep_us >= 0)
            usleep(sleep_us);
        else
            printf("Late...\n");

    }

    printf("Game Over - Count: %d\n", ctrlData.count);

    // Wait for EOS message
    gst_element_send_event ((GstElement*)gst_pipeline, gst_event_new_eos ());
    GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(gst_pipeline));
    gst_bus_poll(bus, GST_MESSAGE_EOS, GST_CLOCK_TIME_NONE);


    gst_element_set_state((GstElement*)gst_pipeline, GST_STATE_NULL);
    gst_object_unref(GST_OBJECT(gst_pipeline));
    g_main_loop_unref(main_loop);
    eglTerminate(egl_display);

    g_print("going to exit \n");
    return 0;
}
Makefile
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#################################################################################

APP:= poorpong_nvmm
CUDA_VER?=10.2
OPENCV_DIR=/usr/local

ifeq ($(CUDA_VER),)
  $(error "CUDA_VER is not set")
endif
CXX:= g++
SRCS:= poorpong_nvmm.cpp

CFLAGS:= -Wall -std=c++11 -ggdb\
        -I/usr/src/jetson_multimedia_api/include \
	-I/usr/local/cuda-$(CUDA_VER)/include \
        -I$(OPENCV_DIR)/include/opencv4

LIBS:= -Wall -std=c++11 \
	-L/usr/lib/aarch64-linux-gnu/tegra/ -lEGL -lGLESv2 \
	-L/usr/lib/aarch64-linux-gnu/tegra/ -lcuda -lnvbuf_utils \
	-L/usr/local/cuda-$(CUDA_VER)/lib64/ -lcudart \
	-L$(OPENCV_DIR)/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lX11

OBJS:= $(SRCS:.cpp=.o)

PKGS:= gstreamer-app-1.0
CFLAGS+= `pkg-config --cflags $(PKGS))`
LIBS+= `pkg-config --libs $(PKGS)`

all: $(APP)

%.o: %.cpp
	@echo "Compiling: $<"
	$(CXX) $(CFLAGS) -c $< -o $@

$(APP): $(OBJS)
	@echo "Linking: $@"
	$(CXX) -o $@ $(OBJS) $(CFLAGS) $(LIBS)

clean:
	rm -rf $(OBJS) $(APP)

Have fun !

1 Like