Hello,
I want to capture screen pixels of a qml qt quick controls application on nvidia xavier nx platform by using native opengl functions since I will use same functionality on Android as well. I have found that I can implement some asynchronous readback implementation by using glReadPixels function. My requirement is to get 16 bit RGB color pixels. For this reason I have created 2 different algorithms which causes high cpu usage.Followings are the implementations and details for CPU usage, read time and process time:
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Sceneraio-1:
void WaylandEgl::createPixelBO()
{
if (!buffCreated)
{
pbo_size = mWinHeight * mWinWidth *2;
pixels = new unsigned char[pbo_size];
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glGenBuffers(PBO_COUNT,pboIds);
glBindBuffer(GL_PIXEL_PACK_BUFFER,pboIds[0]);
glBufferData(GL_PIXEL_PACK_BUFFER, pbo_size, 0, GL_STREAM_READ);
glBindBuffer(GL_PIXEL_PACK_BUFFER,pboIds[1]);
glBufferData(GL_PIXEL_PACK_BUFFER, pbo_size, 0, GL_STREAM_READ);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
buffCreated = true;
glInfo glInfo;
glInfo.getInfo();
glInfo.printSelf();
if(glInfo.isExtensionSupported("GL_ARB_pixel_buffer_object"))
{
qDebug() << "Video card supports GL_ARB_pixel_buffer_object.";
pboSupported = true;
}
else
{
qDebug() << "Video card does NOT support GL_ARB_pixel_buffer_object.";
pboSupported = false;
return;
}
}
}
void WaylandEgl::runPixelBO()
{
static int index = 0;
int nextIndex = 0; // pbo index used for next frame
index = (index + 1) % 2; // index 1
nextIndex = (index + 1) % 2; // nexIndex 0
createPixelBO();
memset(pixels,0,pbo_size);
glReadBuffer(GL_FRONT);
if (pboSupported)
{
t1.start();
glBindBuffer(GL_PIXEL_PACK_BUFFER, pboIds[index]);
glReadPixels(0, 0, mWinWidth, mWinHeight, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 0);
t1.stop();
readTime = t1.getElapsedTimeInMilliSec();
t1.start();
glBindBuffer(GL_PIXEL_PACK_BUFFER, pboIds[nextIndex]);
GLubyte *ptr = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT);
if (ptr)
{
memcpy(pixels, ptr, pbo_size);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
}
else
{
qDebug() << "NULL ptr";
}
t1.stop();
processTime = t1.getElapsedTimeInMilliSec();
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
else
{
t1.start();
glReadPixels(0, 0, mWinWidth, mWinHeight, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, pixels);
// measure the time reading framebuffer
t1.stop();
readTime = t1.getElapsedTimeInMilliSec();
t1.start();
// measure the time reading framebuffer
t1.stop();
processTime = t1.getElapsedTimeInMilliSec();
}
qDebug() << "Read Time " << readTime;
qDebug() << "Process Time " << processTime;
}
Results:
PBO ON:
CPU: 28-32%
Read Time 5.065 ms
Process Time 0.334 ms
PBO OFF:
CPU:24-27%
Read Time 5.26 ms
Process Time 0 ms
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Sceneraio-2
void WaylandEgl::initFastBuffers()
{
if (!buffCreated)
{
pbo_size = mWinHeight * mWinWidth *2;
pixels = new unsigned char[pbo_size];
Readback_buf = (GLchar *) malloc( pbo_size );
glGenBuffers( PBO_COUNT, pboIds );
// Buffer #0: glReadPixels target
GLenum target = GL_PIXEL_PACK_BUFFER;
glBindBuffer( target, pboIds[0] );
glBufferData( target, pbo_size, 0, GL_STATIC_COPY );
glGetBufferParameterui64vNV = (PFNGLGETBUFFERPARAMETERUI64VNVPROC)eglGetProcAddress("glGetBufferParameterui64vNV");
if (!glGetBufferParameterui64vNV)
{
qDebug() << "glGetBufferParameterui64vNV not fouynded!";
return;
}
glMakeBufferResidentNV = (PFNGLMAKEBUFFERRESIDENTNVPROC)eglGetProcAddress("glMakeBufferResidentNV");
if (!glMakeBufferResidentNV)
{
qDebug() << "glMakeBufferResidentNV not fouynded!";
return;
}
glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)eglGetProcAddress("glUnmapBufferARB");
if (!glUnmapBufferARB)
{
qDebug() << "glUnmapBufferARB not fouynded!";
return;
}
glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)eglGetProcAddress("glGetBufferSubData");
if (!glGetBufferSubData)
{
qDebug() << "glGetBufferSubData not fouynded!";
return;
}
qDebug() << "Run the optimizatiosn";
GLuint64EXT addr;
glGetBufferParameterui64vNV( target, GL_BUFFER_GPU_ADDRESS_NV, &addr );
glMakeBufferResidentNV( target, GL_READ_ONLY );
// Buffer #1: glCopyBuffer target
target = GL_COPY_WRITE_BUFFER;
glBindBuffer( target, pboIds[1] );
glBufferData( target, pbo_size, 0, GL_STREAM_READ );
glMapBufferRange( target, 0, 1, GL_MAP_WRITE_BIT);
glUnmapBufferARB( target );
glGetBufferParameterui64vNV( target, GL_BUFFER_GPU_ADDRESS_NV, &addr );
glMakeBufferResidentNV ( target, GL_READ_ONLY );
buffCreated = true;
glPixelStorei( GL_PACK_ALIGNMENT, 1 );
}
}
void WaylandEgl::doReadbackFAST()
{
// Work-around for NVidia driver readback crippling on GeForce.
initFastBuffers();
//glFinish();
Timer t1;
t1.start();
// Do a depth readback to BUF OBJ #0
glBindBuffer( GL_PIXEL_PACK_BUFFER, pboIds[0] );
glReadPixels( 0, 0, mWinWidth, mWinHeight,
GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 0 );
t1.stop();
readTime = t1.getElapsedTimeInMilliSec();
t1.start();
// Copy from BUF OBJ #0 to BUF OBJ #1
glBindBuffer( GL_COPY_WRITE_BUFFER, pboIds[1] );
glCopyBufferSubData( GL_PIXEL_PACK_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
pbo_size );
// Do the readback from BUF OBJ #1 to app CPU memory
glGetBufferSubData( GL_COPY_WRITE_BUFFER, 0, pbo_size,
Readback_buf );
//sendImage((unsigned char*)Readback_buf,pbo_size);
t1.stop();
processTime = t1.getElapsedTimeInMilliSec();
glBindBuffer( GL_PIXEL_PACK_BUFFER, 0 );
qDebug() << "Read Time " << readTime;
qDebug() << "Process Time " << processTime;
}
Results:
PBO ON:
CPU: 28-33%
Read Time 3.446 ms
Process Time 2.111 ms
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Strange point is that when I call function glReadPixels as blocking it uses less cpu for RGB16 bit color.
For RGBA and RGB both algorithms works fine and Sceneraio-2’s algorithm works with less cpu consumption.I need to get RGB16 bit color pixels and want to make cpu usage as lower as possible. Could you please check my algorithms or/and suggest me a way to manage low cpu consumption for getting 16 bit color pixels ?
Regards