Hi,
While extracting a minimal sample code I found out that the memory bloat does not come from consuming the EGLStream with CUDA, but from the CUDA-OpenGL interoperability (maybe I am misusing it).
src/window.hpp:
#pragma once
#include <EGL/egl.h>
namespace window {
struct Window; // X11 conflict outside namespace ..
/// returns NULL on failure
Window* create(
uint32_t width, uint32_t height,
const char* title
);
void destroy(Window* w);
/// returns false on failure
bool poll_events(Window* w);
void request_exit(Window* w);
bool should_close(const Window* w);
EGLNativeDisplayType get_native_display(const Window* w);
EGLNativeWindowType get_native_window(const Window* w);
};
src/window_x11.cpp:
#include "window.hpp"
#include <X11/Xlib.h>
#include <X11/Xutil.h>
using XDisplay = Display;
using XWindow = Window;
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <signal.h>
namespace window {
struct Window {
XDisplay* display;
XWindow window;
uint32_t width;
uint32_t height;
};
static bool exit_requested = false;
static
void signal_exit(int signo) {
exit_requested = true;
}
Window* create(
uint32_t width, uint32_t height,
const char* title
) {
// catch interruptions
if (signal(SIGINT, signal_exit) == SIG_ERR) {
fprintf(stderr, "Could not register SIGINT handler\n");
return NULL;
}
if (signal(SIGTSTP, signal_exit) == SIG_ERR) {
fprintf(stderr, "Could not register SIGTSTP handler\n");
return NULL;
}
// XInitThreads();
// should not be necessary, we will be pinned to one thread
XDisplay* display = XOpenDisplay(NULL);
if (!display) {
fprintf(stderr, "Could not open X display "
"(is the DISPLAY environment variable set?)\n");
return NULL;
}
XWindow root = DefaultRootWindow(display);
if (root == None) {
fprintf(stderr, "Could not get default root window\n");
return NULL;
}
XSetWindowAttributes swa;
swa.event_mask = KeyPressMask | StructureNotifyMask;
XWindow window = XCreateWindow(
display, root,
0, 0, width, height, 0,
CopyFromParent, CopyFromParent, CopyFromParent,
CWEventMask, &swa
);
if (window == None) {
fprintf(stderr, "Could not create window\n");
return NULL;
}
Atom wm_delete = XInternAtom(display, "WM_DELETE_WINDOW", 1);
if (wm_delete == None) {
fprintf(stderr, "Could not get WM_DELETE_WINDOW\n");
return NULL;
}
if (XSetWMProtocols(display, window, &wm_delete, 1) == 0) {
fprintf(stderr, "Could not set WM protocols\n");
return NULL;
}
XStoreName(display, window, title);
XMapWindow(display, window);
return new Window {
.display = display,
.window = window,
.width = width,
.height = height,
};
}
void destroy(Window* w) {
XDestroyWindow(w->display, w->window);
XCloseDisplay(w->display);
delete w;
}
bool poll_events(Window* w) {
while (XPending(w->display)) {
XEvent event;
XNextEvent(w->display, &event);
if (event.xany.window != w->window) {
continue;
}
if (event.type == ConfigureNotify) {
uint32_t width = event.xconfigure.width;
uint32_t height = event.xconfigure.height;
if ((width != w->width) || (height != w->height)) {
w->width = width;
w->height = height;
// TODO: resize handling
}
} else if (event.type == KeyPress) {
// TODO?
// event.xkey.state
// ShiftMask, LockMask, ControlMask,
// Mod1/2/3/4/5Mask
// Button1/2/3/4/5Mask
int keysym_per_keycode;
KeySym* keysyms = XGetKeyboardMapping(w->display,
event.xkey.keycode, 1, &keysym_per_keycode);
if (keysyms) {
printf("Received key press: %s\n",
XKeysymToString(keysyms[0]));
XFree(keysyms);
}
} else if (event.type == ClientMessage) {
if (strcmp(XGetAtomName(w->display, event.xclient.message_type),
"WM_PROTOCOLS") == 0) {
request_exit(w);
return true;
}
}
}
return true;
}
void request_exit(Window* w) {
exit_requested = true;
}
bool should_close(const Window* w) {
return exit_requested;
}
EGLNativeDisplayType get_native_display(const Window* w) {
return w->display;
}
EGLNativeWindowType get_native_window(const Window* w) {
return w->window;
}
};
src/graphics.hpp:
#pragma once
#include <GLES2/gl2.h>
#include <GLES2/gl2ext.h>
#include <EGL/egl.h>
#include <EGL/eglext.h>
#define EGL_EXTN_FUNC_LIST \
EGL_EXTN_FUNC(PFNEGLCREATESTREAMKHRPROC, eglCreateStreamKHR) \
EGL_EXTN_FUNC(PFNEGLDESTROYSTREAMKHRPROC, eglDestroyStreamKHR) \
EGL_EXTN_FUNC(PFNEGLSTREAMATTRIBKHRPROC, eglStreamAttribKHR) \
EGL_EXTN_FUNC(PFNEGLQUERYSTREAMKHRPROC, eglQueryStreamKHR) \
\
EGL_EXTN_FUNC(PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC, eglStreamConsumerGLTextureExternalKHR) \
EGL_EXTN_FUNC(PFNEGLSTREAMCONSUMERACQUIREKHRPROC, eglStreamConsumerAcquireKHR) \
EGL_EXTN_FUNC(PFNEGLSTREAMCONSUMERRELEASEKHRPROC, eglStreamConsumerReleaseKHR) \
\
EGL_EXTN_FUNC(PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC, eglCreateStreamProducerSurfaceKHR)
//
// EGL_EXTN_FUNC(PFNEGLDESTROYSYNCKHRPROC, eglDestroySyncKHR)
// EGL_EXTN_FUNC(PFNEGLCLIENTWAITSYNCKHRPROC, eglClientWaitSyncKHR)
// EGL_EXTN_FUNC(PFNEGLSIGNALSYNCKHRPROC, eglSignalSyncKHR)
#define EGL_EXTN_FUNC(_type, _name) extern _type _name;
EGL_EXTN_FUNC_LIST
#undef EGL_EXTN_FUNC
struct Graphics {
EGLDisplay egl_display;
EGLSurface egl_surface;
EGLContext egl_context;
};
namespace window {
struct Window;
};
namespace graphics {
bool init(const window::Window* window, Graphics* g);
void destroy(Graphics* g);
void swap_buffers(Graphics* g);
bool stream_query_new_frame(
EGLStreamKHR stream, Graphics* g, bool* new_frame
);
struct StreamRender {
EGLStreamKHR stream;
GLuint texture_program;
GLuint texture;
};
bool init_stream_render(
Graphics* g, EGLStreamKHR stream, StreamRender* sr
);
void destroy_stream_render(StreamRender* sr);
void render_stream_frame(StreamRender* sr, Graphics* g);
struct TextureRender {
GLuint program;
};
bool init_texture_render(Graphics* g, TextureRender* tr);
void destroy_texture_render(TextureRender* tr);
void render_texture(TextureRender* tr, GLuint texture, Graphics* g);
};
src/graphics.cpp:
#include "graphics.hpp"
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <array>
#include <unistd.h>
#include <window.hpp>
namespace graphics {
static
bool choose_egl_config(EGLDisplay display, EGLConfig* out) {
static const EGLint attribute_list[] = {
// EGL_CONFORMANT,
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
EGL_SURFACE_TYPE, 0x0,
EGL_NONE
};
const size_t CONFIGS_BUFLEN = 128;
EGLConfig configs[CONFIGS_BUFLEN];
EGLint config_count;
if (!eglChooseConfig(
display, attribute_list,
configs, CONFIGS_BUFLEN, &config_count)) {
fprintf(stderr, "Could not query EGL configs\n");
return false;
}
for (EGLint c = 0; c < config_count; c++) {
EGLint attr;
#define SELECT_ATTR(enum, value) \
eglGetConfigAttrib(display, configs[c], enum, &attr); \
if (attr != value) continue;
SELECT_ATTR(EGL_BUFFER_SIZE, 32);
SELECT_ATTR(EGL_DEPTH_SIZE, 24);
SELECT_ATTR(EGL_SAMPLES, 0);
#undef SELECT_ATTR
*out = configs[c];
return true;
}
fprintf(stderr, "No suitable EGL config found\n");
return false;
}
#define EGL_EXTN_FUNC(_type, _name) _type _name = NULL;
EGL_EXTN_FUNC_LIST
#undef EGL_EXTN_FUNC
bool init(const window::Window* window, Graphics* g) {
g->egl_display = eglGetDisplay(window::get_native_display(window));
if (g->egl_display == EGL_NO_DISPLAY) {
fprintf(stderr, "Could not get EGL display\n");
// Could try to use EXT_platform_device
return false;
}
if (!eglInitialize(g->egl_display, NULL, NULL)) {
fprintf(stderr, "Could not initialize EGL display\n");
return false;
}
// check extensions
const char* egl_extensions =
eglQueryString(g->egl_display, EGL_EXTENSIONS);
#define CHECK(name) \
if (!strstr(egl_extensions, #name)) { \
fprintf(stderr, #name " not supported\n"); \
return false; \
}
CHECK(EGL_KHR_stream);
CHECK(EGL_KHR_stream_consumer_gltexture);
CHECK(EGL_KHR_stream_producer_eglsurface);
// CHECK(EGL_KHR_fence_sync);
// CHECK(EGL_KHR_reusable_sync);
#undef CHECK
// get function pointers
#define EGL_EXTN_FUNC(_type, _name) \
_name = (_type)eglGetProcAddress(#_name); \
if (!_name) { \
fprintf(stderr, "Failed to get function:" #_name "\n"); \
return false; \
}
EGL_EXTN_FUNC_LIST
#undef EGL_EXTN_FUNC
/////////
EGLConfig egl_config;
if (!choose_egl_config(g->egl_display, &egl_config)) {
return false;
}
g->egl_surface = eglCreateWindowSurface(
g->egl_display, egl_config,
window::get_native_window(window), NULL
);
if (g->egl_surface == EGL_NO_SURFACE) {
fprintf(stderr, "Could not create EGL surface\n");
return false;
}
static const EGLint context_attribs[] = {
EGL_CONTEXT_CLIENT_VERSION, 3,
EGL_NONE
};
g->egl_context = eglCreateContext(
g->egl_display, egl_config, EGL_NO_CONTEXT, context_attribs);
if (g->egl_context == EGL_NO_CONTEXT) {
fprintf(stderr, "Could not create EGL context\n");
return false;
}
if (!eglMakeCurrent(g->egl_display,
g->egl_surface, g->egl_surface,
g->egl_context)) {
fprintf(stderr, "Could not make EGL context current\n");
return false;
}
return true;
}
void destroy(Graphics* g) {
eglDestroyContext(g->egl_display, g->egl_context);
eglReleaseThread();
eglDestroySurface(g->egl_display, g->egl_surface);
eglTerminate(g->egl_display);
}
void swap_buffers(Graphics* g) {
eglSwapBuffers(g->egl_display, g->egl_surface);
}
static
bool create_shader(GLenum type, const char* src, GLuint* shader_out) {
GLuint shader = glCreateShader(type);
if (!shader) {
fprintf(stderr, "Unable to create shader\n");
return false;
}
glShaderSource(shader, 1, &src, NULL);
glCompileShader(shader);
GLint compile_status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
if (compile_status != GL_TRUE) {
GLchar info[1024];
glGetShaderInfoLog(shader, sizeof(info), NULL, info);
fprintf(stderr, "%s shader compilation failed:\n%s",
(type == GL_VERTEX_SHADER ? "Vertex" : "Fragment"),
info);
glDeleteShader(shader);
return false;
}
*shader_out = shader;
return true;
}
static
bool create_program(
const char* vertex_src,
const char* fragment_src,
GLuint* program_out
) {
bool ok = false;
GLuint program = glCreateProgram();
if (!program) {
fprintf(stderr, "Unable to create program\n");
goto exit;
}
GLuint vertex_shader, fragment_shader;
if (!create_shader(GL_VERTEX_SHADER, vertex_src, &vertex_shader)) {
goto exit_program;
}
if (!create_shader(GL_FRAGMENT_SHADER, fragment_src, &fragment_shader)) {
goto exit_vertex;
}
glAttachShader(program, vertex_shader);
glAttachShader(program, fragment_shader);
glLinkProgram(program);
GLint status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
if (status != GL_TRUE) {
GLchar info[1024];
glGetProgramInfoLog(program, sizeof(info), NULL, info);
fprintf(stderr, "Program linking failed with log:\n%s\n", info);
goto exit_fragment;
}
*program_out = program;
ok = true;
exit_fragment:
glDeleteShader(fragment_shader);
exit_vertex:
glDeleteShader(vertex_shader);
exit_program:
if (!ok) glDeleteProgram(program);
exit:
return ok;
}
bool stream_query_new_frame(
EGLStreamKHR stream, Graphics* g, bool* new_frame
) {
EGLint state;
if (!eglQueryStreamKHR(g->egl_display, stream,
EGL_STREAM_STATE_KHR, &state)) {
fprintf(stderr, "Failed to query stream state\n");
return false;
}
if (state == EGL_BAD_STREAM_KHR || state == EGL_BAD_STATE_KHR) {
fprintf(stderr, "EGL stream is in bad state\n");
return false;
}
if (state == EGL_STREAM_STATE_DISCONNECTED_KHR) {
fprintf(stderr, "EGL stream disconnected\n");
return false;
}
*new_frame = (state == EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR);
return true;
}
// shader program to render a texture
static const GLfloat quad_coords[] =
{1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f};
bool init_stream_render(
Graphics* g, EGLStreamKHR stream, StreamRender* sr
) {
sr->stream = stream;
static const char vertex_src[] =
"#version 300 es\n"
"in layout(location = 0) vec2 coord;\n"
"out vec2 texCoord;\n"
"void main() {\n"
" gl_Position = vec4((coord * 2.0) - 1.0, 0.0, 1.0);\n"
// top-left origin need to be inverted for GL texture use.
" texCoord = vec2(coord.x, 1.0 - coord.y);\n"
"}\n";
static const char fragment_src[] =
"#version 300 es\n"
"#extension GL_OES_EGL_image_external : require\n"
"precision highp float;\n"
"uniform samplerExternalOES texSampler;\n"
"in vec2 texCoord;\n"
"out vec4 fragColor;\n"
"void main() {\n"
" fragColor = texture2D(texSampler, texCoord);\n"
"}\n";
if (!create_program(vertex_src, fragment_src, &sr->texture_program)) {
return false;
}
glUseProgram(sr->texture_program);
GLint texture_uniform =
glGetUniformLocation(sr->texture_program, "texSampler");
glUniform1i(texture_uniform, 0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, quad_coords);
glEnableVertexAttribArray(0);
glGenTextures(1, &sr->texture);
glBindTexture(GL_TEXTURE_EXTERNAL_OES, sr->texture);
if (!eglStreamConsumerGLTextureExternalKHR(g->egl_display, stream)) {
fprintf(stderr, "Unable to connect GL as consumer\n");
return false;
}
// Set the acquire timeouts to infinite so it will block
// until new frames are available
// (so all streams will have the same frame acquired).
// When not synced,
// streams will be acquired and rendered as soon as they're available
// eglStreamAttribKHR(g->egl_display, stream,
// EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR, -1);
return true;
}
void destroy_stream_render(StreamRender* sr) {
glDeleteProgram(sr->texture_program);
glDeleteTextures(1, &sr->texture);
}
void render_stream_frame(StreamRender* sr, Graphics* g) {
if (!eglStreamConsumerAcquireKHR(g->egl_display, sr->stream)) {
fprintf(stderr, "Failed to acquire stream frame\n");
exit(EXIT_FAILURE);
}
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (!eglStreamConsumerReleaseKHR(g->egl_display, sr->stream)) {
fprintf(stderr, "Failed to release stream frame\n");
exit(EXIT_FAILURE);
}
}
bool init_texture_render(Graphics* g, TextureRender* tr) {
static const char vertex_src[] =
"#version 300 es\n"
"in layout(location = 0) vec2 coord;\n"
"out vec2 texCoord;\n"
"void main() {\n"
" gl_Position = vec4((coord * 2.0) - 1.0, 0.0, 1.0);\n"
// top-left origin need to be inverted for GL texture use.
" texCoord = vec2(coord.x, 1.0 - coord.y);\n"
"}\n";
static const char fragment_src[] =
"#version 300 es\n"
"precision highp float;\n"
"uniform sampler2D texSampler;\n"
"in vec2 texCoord;\n"
"out vec4 fragColor;\n"
"void main() {\n"
" fragColor = texture2D(texSampler, texCoord);\n"
"}\n";
if (!create_program(vertex_src, fragment_src, &tr->program)) {
return false;
}
GLint texture_uniform =
glGetUniformLocation(tr->program, "texSampler");
glUniform1i(texture_uniform, 0);
return true;
}
void destroy_texture_render(TextureRender* tr) {
glDeleteProgram(tr->program);
}
void render_texture(TextureRender* tr, GLuint texture, Graphics* g) {
glUseProgram(tr->program);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, quad_coords);
glEnableVertexAttribArray(0);
glBindTexture(GL_TEXTURE_2D, texture);
glEnable(GL_TEXTURE_2D);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
};
src/camera.hpp:
#pragma once
#include <Argus/Argus.h>
struct Graphics;
namespace camera {
struct Session {
Argus::CameraProvider* camera_provider;
Argus::CameraDevice* camera_device;
Argus::SensorMode* sensor_mode;
Argus::CaptureSession* capture_session;
// Argus::EventQueue* event_queue;
Argus::OutputStream* output_stream;
Argus::Request* request;
};
bool init(
uint32_t width, uint32_t height,
Graphics* graphics,
Session* session
);
void destroy(Session* session);
EGLStreamKHR get_egl_stream(Session* session);
bool begin_stream(Session* session, uint64_t fps);
};
src/camera.cpp:
#include "camera.hpp"
#include <cstdlib>
#include <cstdio>
#include <graphics.hpp>
using Argus::interface_cast;
using Argus::UniqueObj;
namespace camera {
bool init(
uint32_t width, uint32_t height,
Graphics* graphics,
Session* session
) {
session->camera_provider = Argus::CameraProvider::create();
auto* i_camera_provider =
interface_cast<Argus::ICameraProvider>(session->camera_provider);
if (!i_camera_provider) {
fprintf(stderr, "Failed to get camera provider\n");
return false;
}
printf("Argus version: %s\n", i_camera_provider->getVersion().c_str());
std::vector<Argus::CameraDevice*> camera_devices;
if (i_camera_provider->getCameraDevices(&camera_devices)
!= Argus::STATUS_OK)
{
fprintf(stderr, "Failed to get camera devices\n");
return false;
} else if (camera_devices.empty()) {
fprintf(stderr, "No camera devices available\n");
return false;
}
size_t device_index = 0;
session->camera_device = camera_devices[device_index];
auto* i_properties =
interface_cast<Argus::ICameraProperties>(session->camera_device);
if (!i_properties) {
fprintf(stderr, "Failed to get camera properties\n");
return false;
}
std::vector<Argus::SensorMode*> sensor_modes;
i_properties->getAllSensorModes(&sensor_modes);
size_t sensor_mode_index = 2;
session->sensor_mode = sensor_modes[sensor_mode_index];
session->capture_session = i_camera_provider->
createCaptureSession(session->camera_device);
auto* i_session =
interface_cast<Argus::ICaptureSession>(session->capture_session);
if (!i_session) {
fprintf(stderr, "Failed to get capture session\n");
return false;
}
UniqueObj<Argus::OutputStreamSettings> stream_settings
(i_session->createOutputStreamSettings());
auto* i_stream_settings =
interface_cast<Argus::IOutputStreamSettings>(stream_settings);
if (!i_stream_settings) {
fprintf(stderr, "Failed to create output stream settings\n");
return false;
}
auto format = Argus::PIXEL_FMT_YCbCr_420_888;
if (!i_stream_settings->supportsOutputStreamFormat(
session->sensor_mode, format
)) {
fprintf(stderr, "Output format not supported\n");
return false;
}
i_stream_settings->setPixelFormat(format);
// TODO? could use sensor mode resolution
Argus::Size2D<uint32_t> resolution(width, height);
i_stream_settings->setResolution(resolution);
i_stream_settings->setEGLDisplay(graphics->egl_display);
i_stream_settings->setMode(Argus::STREAM_MODE_MAILBOX);
// i_stream_settings->setMetadataEnable(true);
session->output_stream =
i_session->createOutputStream(stream_settings.get());
auto* i_stream = interface_cast<Argus::IStream>(session->output_stream);
if (!i_stream) {
fprintf(stderr, "Failed to create output stream\n");
return false;
}
session->request = NULL;
return true;
}
void destroy(Session* session) {
if (session->request) {
auto* i_session = interface_cast<Argus::ICaptureSession>(
session->capture_session);
i_session->stopRepeat();
i_session->waitForIdle();
session->request->destroy();
}
session->output_stream->destroy();
session->capture_session->destroy();
session->camera_provider->destroy();
}
EGLStreamKHR get_egl_stream(Session* session) {
return interface_cast<Argus::IStream>(session->output_stream)
->getEGLStream();
}
bool begin_stream(Session* session, uint64_t fps) {
auto* i_session =
interface_cast<Argus::ICaptureSession>(session->capture_session);
session->request =
i_session->createRequest(Argus::CAPTURE_INTENT_PREVIEW);
auto* i_request = interface_cast<Argus::IRequest>(session->request);
if (!i_request) {
fprintf(stderr, "Failed to create request\n");
return false;
}
auto* i_source_settings =
interface_cast<Argus::ISourceSettings>(i_request->getSourceSettings());
// i_source_settings->setExposureTimeRange
// i_source_settings->setFocusPosition
Argus::Range<uint64_t> fdr(1e9 / fps);
i_source_settings->setFrameDurationRange(fdr);
// i_source_settings->setGainRange
i_source_settings->setSensorMode(session->sensor_mode);
// i_source_settings->setOpticalBlack
// i_source_settings->setOpticalBlackEnable
// auto* i_stream_settings = interface_cast<Argus::IStreamSettings>(
// i_request->getStreamSettings(session->output_stream));
// i_stream_settings->setSourceClipRect
// i_stream_settings->setPostProcessingEnable(false);
// auto* i_auto_settings = interface_cast<Argus::IAutoControlSettings>(
// i_request->getAutoControlSettings());
if (i_request->enableOutputStream(session->output_stream)
!= Argus::STATUS_OK) {
fprintf(stderr, "Failed to enable output stream\n");
return false;
}
if (i_session->repeat(session->request) != Argus::STATUS_OK) {
fprintf(stderr, "Failed to start capture request\n");
return false;
}
return true;
}
};
src/cuda/core.hpp:
#pragma once
#include <cuda.h>
#include <cudaEGL.h>
#include <cuda_runtime.h>
#include <GLES2/gl2.h>
#include <GLES2/gl2ext.h>
#include <cuda_gl_interop.h>
#include <cstdio>
#define STRINGIFY(x) #x
#define ESTRINGIFY(x) STRINGIFY(x)
#define CODE_LOCATION \
ESTRINGIFY(__FILE__) ":" \
ESTRINGIFY(__LINE__)
#define CUDA_CHECK(call) \
cuda::check("in " CODE_LOCATION " (" #call ")\n ->", call)
namespace cuda {
inline bool check(const char* desc, cudaError_t e) {
if (e == cudaSuccess) {
return true;
}
const char* es = cudaGetErrorString(e);
fprintf(stderr, "CUDA error: %s %s\n", desc, es);
return false;
}
struct EGLStreamConnection {
CUeglStreamConnection raw;
CUgraphicsResource resource;
};
bool init_egl_stream_connection(EGLStreamConnection* c, EGLStreamKHR stream);
void destroy_egl_stream_connection(EGLStreamConnection* c);
bool acquire_stream_frame(
EGLStreamConnection* c,
cudaArray_t* array,
cudaSurfaceObject_t* surface
);
bool release_stream_frame(
EGLStreamConnection* c, cudaSurfaceObject_t surface
);
struct GLTarget {
cudaGraphicsResource_t resource;
GLuint texture;
};
bool init_gl_target(int width, int height, GLint format, GLTarget* t);
void destroy_gl_target(GLTarget* t);
template<typename F>
bool map_gl_target(GLTarget* t, F f) {
if (!CUDA_CHECK(cudaGraphicsMapResources(1, &t->resource, 0))) {
return false;
}
cudaArray* array;
if (!CUDA_CHECK(cudaGraphicsSubResourceGetMappedArray(
&array, t->resource, 0, 0
))) {
return false;
}
bool result = f(array);
if (!CUDA_CHECK(cudaGraphicsUnmapResources(1, &t->resource, 0))) {
return false;
}
return result;
}
};
#define CU_CHECK(call) \
cu::check("in " CODE_LOCATION " (" #call ")\n ->", call)
namespace cu {
inline bool check(const char* desc, CUresult r) {
if (r == CUDA_SUCCESS) {
return true;
}
const char* es;
cuGetErrorString(r, &es);
fprintf(stderr, "CUDA error %s %s\n", desc, es);
return false;
}
};
src/cuda/core.cpp:
#include "core.hpp"
#include <cstring>
namespace cuda {
bool init_egl_stream_connection(EGLStreamConnection* c, EGLStreamKHR stream) {
if (!CU_CHECK(cuEGLStreamConsumerConnect(&c->raw, stream))) {
return false;
}
c->resource = 0;
return true;
}
void destroy_egl_stream_connection(EGLStreamConnection* c) {
CU_CHECK(cuEGLStreamConsumerDisconnect(&c->raw));
}
bool acquire_stream_frame(
EGLStreamConnection* c,
cudaArray_t* array,
cudaSurfaceObject_t* surface
) {
CUstream stream = 0;
unsigned int timeout = -1;
if (!CU_CHECK(cuEGLStreamConsumerAcquireFrame(
&c->raw, &c->resource, &stream, timeout
))) {
return false;
}
CUeglFrame frame;
unsigned int index = 0;
unsigned int mip_level = 0;
if (!CU_CHECK(cuGraphicsResourceGetMappedEglFrame(
&frame, c->resource, index, mip_level
))) {
return false;
}
if (frame.frameType != CU_EGL_FRAME_TYPE_ARRAY) {
fprintf(stderr, "Expected CUDA array from EGL frame\n");
return false;
}
if (frame.eglColorFormat != CU_EGL_COLOR_FORMAT_YUV420_PLANAR &&
frame.eglColorFormat != CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
fprintf(stderr, "Unexpected EGL frame color format\n");
return false;
}
if (frame.cuFormat != CU_AD_FORMAT_UNSIGNED_INT8) {
fprintf(stderr, "Unexpected EGL frame format\n");
return false;
}
// create a surface from the luminance plane
CUDA_RESOURCE_DESC resource_desc;
memset(&resource_desc, 0, sizeof(resource_desc));
resource_desc.resType = CU_RESOURCE_TYPE_ARRAY;
resource_desc.res.array.hArray = frame.frame.pArray[0];
*array = (cudaArray_t)(frame.frame.pArray[0]);
if (!CU_CHECK(cuSurfObjectCreate(surface, &resource_desc))) {
return false;
}
return true;
}
bool release_stream_frame(
EGLStreamConnection* c, cudaSurfaceObject_t surface
) {
if (!CU_CHECK(cuSurfObjectDestroy(surface))) {
return false;
}
CUstream stream = 0;
if (!CU_CHECK(cuEGLStreamConsumerReleaseFrame(
&c->raw, c->resource, &stream
))) {
return false;
}
c->resource = 0;
return true;
}
bool init_gl_target(int width, int height, GLint format, GLTarget* t) {
GLuint texture;
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
GLint mip_level = 0;
GLint border = 0;
GLenum type = GL_UNSIGNED_BYTE;
void* data = NULL;
glTexImage2D(GL_TEXTURE_2D,
mip_level,
format,
width, height, border,
format, type,
data);
cudaGraphicsResource_t resource;
if (!CUDA_CHECK(cudaGraphicsGLRegisterImage(
&resource, texture, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsWriteDiscard
// cudaGraphicsRegisterFlagsSurfaceLoadStore
))) {
return false;
}
t->texture = texture;
t->resource = resource;
return true;
}
void destroy_gl_target(GLTarget* t) {
CUDA_CHECK(cudaGraphicsUnregisterResource(t->resource));
glDeleteTextures(1, &t->texture);
}
};
src/bin/interop.cpp:
#include <window.hpp>
#include <graphics.hpp>
#include <camera.hpp>
#include <cuda/core.hpp>
int main(int argc, char** argv) {
int exit_code = EXIT_FAILURE;
const size_t width = 1280;
const size_t height = 720;
auto* window = window::create(width, height, "Interoperability");
if (window) {
Graphics graphics;
if (graphics::init(window, &graphics)) {
cudaSetDevice(0);
cudaDeviceReset();
camera::Session session;
if (camera::init(width, height, &graphics, &session)) {
auto egl_stream = camera::get_egl_stream(&session);
cuda::EGLStreamConnection connection;
cuda::init_egl_stream_connection(&connection, egl_stream);
uint64_t fps = 30;
if (camera::begin_stream(&session, fps)) {
cuda::GLTarget gl_target;
cuda::init_gl_target(width, height, GL_LUMINANCE, &gl_target);
graphics::TextureRender render;
graphics::init_texture_render(&graphics, &render);
bool new_frame = false;
while (graphics::stream_query_new_frame(
egl_stream, &graphics, &new_frame) && !new_frame);
cudaArray_t intensity_array;
cudaSurfaceObject_t intensity;
cuda::acquire_stream_frame(&connection, &intensity_array, &intensity);
while (!window::should_close(window)) {
window::poll_events(window);
/* crashing with this
if (!cuda::map_gl_target(&gl_target, [&](auto array) {
return CUDA_CHECK(cudaMemcpy2DArrayToArray(
array, 0, 0,
intensity_array, 0, 0,
width * sizeof(uint8_t), height,
cudaMemcpyDeviceToDevice
));
})) {
break;
}
*/
graphics::render_texture(&render, gl_target.texture, &graphics);
graphics::swap_buffers(&graphics);
}
cuda::release_stream_frame(&connection, intensity);
graphics::destroy_texture_render(&render);
cuda::destroy_gl_target(&gl_target);
exit_code = EXIT_SUCCESS;
}
cuda::destroy_egl_stream_connection(&connection);
camera::destroy(&session);
}
graphics::destroy(&graphics);
}
window::destroy(window);
}
return exit_code;
}
build.ninja:
ninja_required_version = 1.0
builddir = build
includes = $
-I /home/nvidia/tegra_multimedia_api/argus/include $
-I /usr/local/cuda/include $
-I src -I build
cflags = -Ofast -march=native -std=c++14 -Wall ${includes}
rule ccpp
description = Compiling C++ to ${out}
depfile = ${out}.d
command = g++ -MMD -MF ${depfile} -MT ${out} $
${cflags} -c ${in} -o ${out}
deps = gcc
cuda_archflags = -m64 -gencode arch=compute_62,code=sm_62
nvflags = -O3 -std=c++14 ${includes} ${cuda_archflags}
rule ccu
description = Compiling CUDA to ${out}
depfile = ${out}.d
command = nvcc ${nvflags} -M -MT ${out} $
${in} -o ${depfile} && $
nvcc ${nvflags} -c ${in} -o ${out}
deps = gcc
lflags = $
-lX11 $
-lcuda $
-lcudart $
-lEGL $
-lGLESv2 $
-largus $
-L /usr/lib/aarch64-linux-gnu/tegra $
-L /usr/local/cuda/lib64 $
rule link
description = Linking ${out}
command = g++ ${cflags} ${in} -o ${out} ${lflags}
build ${builddir}/window.o: ccpp src/window_x11.cpp
build ${builddir}/graphics.o: ccpp src/graphics.cpp
build ${builddir}/camera.o: ccpp src/camera.cpp
build ${builddir}/cuda/core.o: ccpp src/cuda/core.cpp
build ${builddir}/bin/interop: link src/bin/interop.cpp $
${builddir}/window.o $
${builddir}/graphics.o $
${builddir}/camera.o $
${builddir}/cuda/core.o $
run (./run interop):
#!/usr/bin/env bash
set -o errexit
set -o pipefail
[ -z $1 ] && (echo "Expected binary name"; exit 1)
ninja build/bin/$1
__GL_SYNC_TO_VBLANK=0 ./build/bin/$1
Thanks for looking into it.