Change display size for using 4k cameras

I’ve managed to get detectNet working with 4k camera, but now need to try and get the display size reduced so that I can see the whole video image rather than just a small corner of it .

I’m thinking settings are in detectnet-camera.cpp, maybe within here:

// update display
		if( display != NULL )
		{
			display->UserEvents();
			display->BeginRender();

			if( texture != NULL )
			{
				// rescale image pixel intensities for display
				CUDA(cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), 
								   (float4*)imgRGBA, make_float2(0.0f, 1.0f), 
		 						   camera->GetWidth(), camera->GetHeight()));

				// map from CUDA to openGL using GL interop
				void* tex_map = texture->MapCUDA();

				if( tex_map != NULL )
				{
					cudaMemcpy(tex_map, imgRGBA, texture->GetSize(), cudaMemcpyDeviceToDevice);
					texture->Unmap();
				}

				// draw the texture
				texture->Render(100,100);		
			}

			display->EndRender();
		}

I tried changing a few variables in the above, but no change. Any ideas anybody?

Thanks!

Could you share the implementation of “display->Beginrender()” in your code?

Thanks for reply @WayneWWW.

I’m currently using standard jetson-inference files such as glDisplay.cpp, which has Beginrender() below:

/*
 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 
#include "glDisplay.h"


// Constructor
glDisplay::glDisplay()
{
	mWindowX    = 0;
	mScreenX    = NULL;
	mVisualX    = NULL;
	mContextGL  = NULL;
	mDisplayX   = NULL;
	mWidth      = 0;
	mHeight     = 0;
	mAvgTime    = 1.0f;

	mBgColor[0] = 0.0f;
	mBgColor[1] = 0.0f;
	mBgColor[2] = 0.0f;
	mBgColor[3] = 1.0f;

	clock_gettime(CLOCK_REALTIME, &mLastTime);
}


// Destructor
glDisplay::~glDisplay()
{
	glXDestroyContext(mDisplayX, mContextGL);
}


// Create
glDisplay* glDisplay::Create( const char* title, float r, float g, float b, float a )
{
	glDisplay* vp = new glDisplay();
	
	if( !vp )
		return NULL;
		
	if( !vp->initWindow() )
	{
		printf("[OpenGL]  failed to create X11 Window.\n");
		delete vp;
		return NULL;
	}
	
	if( !vp->initGL() )
	{
		printf("[OpenGL]  failed to initialize OpenGL.\n");
		delete vp;
		return NULL;
	}
	
	GLenum err = glewInit();
	
	if (GLEW_OK != err)
	{
		printf("[OpenGL]  GLEW Error: %s\n", glewGetErrorString(err));
		delete vp;
		return NULL;
	}

	if( title != NULL )
		vp->SetTitle(title);

	vp->SetBackgroundColor(r, g, b, a);

	printf("[OpenGL]  glDisplay display window initialized\n");
	return vp;
}


#define DEFAULT_TITLE "NVIDIA Jetson | JetPack-L4T"


// Create
glDisplay* glDisplay::Create( float r, float g, float b, float a )
{
	return Create(DEFAULT_TITLE, r, g, b, a);
}


// initWindow
bool glDisplay::initWindow()
{
	if( !mDisplayX )
		mDisplayX = XOpenDisplay(0);

	if( !mDisplayX )
	{
		printf( "[OpenGL]  failed to open X11 server connection." );
		return false;
	}

		
	if( !mDisplayX )
	{
		printf( "InitWindow() - no X11 server connection." );
		return false;
	}

	// retrieve screen info
	const int screenIdx   = DefaultScreen(mDisplayX);
	const int screenWidth = DisplayWidth(mDisplayX, screenIdx);
	const int screenHeight = DisplayHeight(mDisplayX, screenIdx);
	
	printf("default X screen %i:   %i x %i\n", screenIdx, screenWidth, screenHeight);
	
	Screen* screen = XScreenOfDisplay(mDisplayX, screenIdx);

	if( !screen )
	{
		printf("failed to retrieve default Screen instance\n");
		return false;
	}
	
	Window winRoot = XRootWindowOfScreen(screen);

	// get framebuffer format
	static int fbAttribs[] =
	{
			GLX_X_RENDERABLE, True,
			GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT,
			GLX_RENDER_TYPE, GLX_RGBA_BIT,
			GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR,
			GLX_RED_SIZE, 8,
			GLX_GREEN_SIZE, 8,
			GLX_BLUE_SIZE, 8,
			GLX_ALPHA_SIZE, 8,
			GLX_DEPTH_SIZE, 24,
			GLX_STENCIL_SIZE, 8,
			GLX_DOUBLEBUFFER, True,
			GLX_SAMPLE_BUFFERS, 0,
			GLX_SAMPLES, 0,
			None
	};

	int fbCount = 0;
	GLXFBConfig* fbConfig = glXChooseFBConfig(mDisplayX, screenIdx, fbAttribs, &fbCount);

	if( !fbConfig || fbCount == 0 )
		return false;

	// get a 'visual'
	XVisualInfo* visual = glXGetVisualFromFBConfig(mDisplayX, fbConfig[0]);

	if( !visual )
		return false;

	// populate windows attributes
	XSetWindowAttributes winAttr;
	winAttr.colormap = XCreateColormap(mDisplayX, winRoot, visual->visual, AllocNone);
	winAttr.background_pixmap = None;
	winAttr.border_pixel = 0;
	winAttr.event_mask = StructureNotifyMask|KeyPressMask|KeyReleaseMask|PointerMotionMask|ButtonPressMask|ButtonReleaseMask;

	
	// create window
	Window win = XCreateWindow(mDisplayX, winRoot, 0, 0, screenWidth, screenHeight, 0,
							   visual->depth, InputOutput, visual->visual, CWBorderPixel|CWColormap|CWEventMask, &winAttr);

	if( !win )
		return false;

	XStoreName(mDisplayX, win, DEFAULT_TITLE);
	XMapWindow(mDisplayX, win);

	// cleanup
	mWindowX = win;
	mScreenX = screen;
	mVisualX = visual;
	mWidth   = screenWidth;
	mHeight  = screenHeight;
	
	XFree(fbConfig);
	return true;
}


void glDisplay::SetTitle( const char* str )
{
	XStoreName(mDisplayX, mWindowX, str);
}

// initGL
bool glDisplay::initGL()
{
	mContextGL = glXCreateContext(mDisplayX, mVisualX, 0, True);

	if( !mContextGL )
		return false;

	GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL));

	return true;
}


// MakeCurrent
void glDisplay::BeginRender()
{
	GL(glXMakeCurrent(mDisplayX, mWindowX, mContextGL));

	GL(glClearColor(mBgColor[0], mBgColor[1], mBgColor[2], mBgColor[3]));
	GL(glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT|GL_STENCIL_BUFFER_BIT));

	GL(glViewport(0, 0, mWidth, mHeight));
	GL(glMatrixMode(GL_PROJECTION));
	GL(glLoadIdentity());
	GL(glOrtho(0.0f, mWidth, mHeight, 0.0f, 0.0f, 1.0f));
}


// timeDiff
static timespec timeDiff( const timespec& start, const timespec& end)
{
	timespec temp;
	if ((end.tv_nsec-start.tv_nsec)<0) {
		temp.tv_sec = end.tv_sec-start.tv_sec-1;
		temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
	} else {
		temp.tv_sec = end.tv_sec-start.tv_sec;
		temp.tv_nsec = end.tv_nsec-start.tv_nsec;
	}
	return temp;
}


// Refresh
void glDisplay::EndRender()
{
	glXSwapBuffers(mDisplayX, mWindowX);

	// measure framerate
	timespec currTime;
	clock_gettime(CLOCK_REALTIME, &currTime);

	const timespec diffTime = timeDiff(mLastTime, currTime);
	const float ns = 1000000000 * diffTime.tv_sec + diffTime.tv_nsec;

	mAvgTime  = mAvgTime * 0.8f + ns * 0.2f;
	mLastTime = currTime;
}


#define MOUSE_MOVE		0
#define MOUSE_BUTTON	1
#define MOUSE_WHEEL		2
#define MOUSE_DOUBLE	3
#define KEY_STATE		4
#define KEY_CHAR		5


// OnEvent
void glDisplay::onEvent( uint msg, int a, int b )
{
	switch(msg)
	{
		case MOUSE_MOVE:
		{
			//mMousePos.Set(a,b);
			break;
		}
		case MOUSE_BUTTON:
		{
			/*if( mMouseButton[a] != (bool)b )
			{
				mMouseButton[a] = b;

				if( b )
					mMouseDownEvent = true;

				// ignore right-mouse up events
				if( !(a == 1 && !b) )
					mMouseEvent = true;
			}*/

			break;
		}
		case MOUSE_DOUBLE:
		{
			/*mMouseDblClick = b;

			if( b )
			{
				mMouseEvent = true;
				mMouseDownEvent = true;
			}*/

			break;
		}
		case MOUSE_WHEEL:
		{
			//mMouseWheel = a;
			break;
		}
		case KEY_STATE:
		{
			//mKeys[a] = b;
			break;
		}
		case KEY_CHAR:
		{
			//mKeyText = a;
			break;
		}
	}

	//if( msg == MOUSE_MOVE || msg == MOUSE_BUTTON || msg == MOUSE_DOUBLE || msg == MOUSE_WHEEL )
	//	mMouseEventLast = time();
}


// UserEvents()
void glDisplay::UserEvents()
{
	// reset input states
	/*mMouseEvent     = false;
	mMouseDownEvent = false;
	mMouseDblClick  = false;
	mMouseWheel     = 0;
	mKeyText		= 0;*/


	XEvent evt;

	while( XEventsQueued(mDisplayX, QueuedAlready) > 0 )
	{
		XNextEvent(mDisplayX, &evt);

		switch( evt.type )
		{
			case KeyPress:	     onEvent(KEY_STATE, evt.xkey.keycode, 1);		break;
			case KeyRelease:     onEvent(KEY_STATE, evt.xkey.keycode, 0);		break;
			case ButtonPress:	 onEvent(MOUSE_BUTTON, evt.xbutton.button, 1); 	break;
			case ButtonRelease:  onEvent(MOUSE_BUTTON, evt.xbutton.button, 0);	break;
			case MotionNotify:
			{
				XWindowAttributes attr;
				XGetWindowAttributes(mDisplayX, evt.xmotion.root, &attr);
				onEvent(MOUSE_MOVE, evt.xmotion.x_root + attr.x, evt.xmotion.y_root + attr.y);
				break;
			}
		}
	}
}

I tried messing about with some of the settings eg mWidth, but was not able to reduce the pixel density at all, just same density in smaller frames.

I am little bit confused what is your current status. Could you take a photo so that I can make sure what’s the problem?

It sounds that your input buffer is larger than the display buffer. What you should do is downsample the camera output but not change the display output.

@WayneWWW
This screenshot below is the resolution set to 1280 x 720. The video images fit into the display without any clipping at all:

Next screen shot is resolution changed to 4096 x 2160. The video images no longer fit into the display without clipping:

What I need is the high resolution but for the video to be displayed without clipping.

Then the problem is if your display is able to support 4k resolution. If it is not, then downsampling is needed.

@WayneWWW I could upgrade monitor to 3840 x 2160, but it would still clip about 250 pixels on horizontal axis.

Any idea how to down sample without affecting the capture resolution?

Ideally, the ‘number crunching’ for object detection should be done at 4096 x 2160 and the more trivial ‘display’ should be done at 1280 x 720. Am I asking too much of the software? Maybe I should just accept small compromise?

Thanks!

I don’t understand. Could you dump the result of below line?

printf("default X screen %i:   %i x %i\n", screenIdx, screenWidth, screenHeight);

Could you confirm what is the input buffer size from camera?

Yes, it’s in the printout from terminal below, (line 51)

default X screen 0:   1920 x 1080

Seems like it’s getting overwritten somewhere.

detectnet-camera
  args (3):  0 [./detectnet-camera]  1 [--prototxt=networks/DetectNet-COCO-Dog/deploy.prototxt]  2 [--model=networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel]  

[gstreamer] initialized gstreamer, version 1.8.3.0
[gstreamer] gstCamera attempting to initialize with GST_SOURCE_NVCAMERA
[gstreamer] gstCamera pipeline string:
v4l2src device=/dev/video0 ! video/x-raw, width=(int)4096, height=(int)2160, format=RGB ! videoconvert ! video/x-raw, format=RGB ! videoconvert !appsink name=mysink
[gstreamer] gstCamera successfully initialized with GST_SOURCE_V4L2

detectnet-camera:  successfully initialized video device
    width:  4096
   height:  2160
    depth:  24 (bpp)

detectNet -- loading detection network model from:
          -- prototxt     networks/DetectNet-COCO-Dog/deploy.prototxt
          -- model        networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel
          -- input_blob   'data'
          -- output_cvg   'coverage'
          -- output_bbox  'bboxes'
          -- mean_pixel   0.000000
          -- class_labels NULL
          -- THRESHOLD    0.500000
          -- batch_size   2

[TRT]  TensorRT version 4.0.2
[TRT]  desired precision specified for GPU: FASTEST
[TRT]  requested fasted precision for device GPU without providing valid calibrator, disabling INT8
[TRT]  native precisions detected for GPU:  FP32, FP16
[TRT]  selecting fastest native precision for GPU:  FP16
[TRT]  attempting to open engine cache file networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel.2.1.GPU.FP16.engine
[TRT]  loading network profile from engine cache... networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel.2.1.GPU.FP16.engine
[TRT]  device GPU, networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel loaded
[TRT]  device GPU, CUDA engine context initialized with 3 bindings
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel input  binding index:  0
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel input  dims (b=2 c=3 h=640 w=640) size=9830400
[cuda]  cudaAllocMapped 9830400 bytes, CPU 0x101540000 GPU 0x101540000
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel output 0 coverage  binding index:  1
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel output 0 coverage  dims (b=2 c=1 h=40 w=40) size=12800
[cuda]  cudaAllocMapped 12800 bytes, CPU 0x101ea0000 GPU 0x101ea0000
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel output 1 bboxes  binding index:  2
[TRT]  networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel output 1 bboxes  dims (b=2 c=4 h=40 w=40) size=51200
[cuda]  cudaAllocMapped 51200 bytes, CPU 0x1020a0000 GPU 0x1020a0000
device GPU, networks/DetectNet-COCO-Dog/snapshot_iter_38600.caffemodel initialized.
[cuda]  cudaAllocMapped 16 bytes, CPU 0x101340200 GPU 0x101340200
detectNet -- model has 1 object classes
detectNet -- maximum bounding boxes:  6400
[cuda]  cudaAllocMapped 102400 bytes, CPU 0x1022a0000 GPU 0x1022a0000
[cuda]  cudaAllocMapped 25600 bytes, CPU 0x1020ac800 GPU 0x1020ac800
default X screen 0:   1920 x 1080
[OpenGL]  glDisplay display window initialized
[OpenGL]   creating 4096x2160 texture
loaded image  fontmapA.png  (256 x 512)  2097152 bytes
[cuda]  cudaAllocMapped 2097152 bytes, CPU 0x1024a0000 GPU 0x1024a0000
[cuda]  cudaAllocMapped 8192 bytes, CPU 0x101ea3200 GPU 0x101ea3200
[gstreamer] gstreamer transitioning pipeline to GST_STATE_PLAYING
[gstreamer] gstreamer changed state from NULL to READY ==> mysink
[gstreamer] gstreamer changed state from NULL to READY ==> videoconvert1
[gstreamer] gstreamer changed state from NULL to READY ==> capsfilter1
[gstreamer] gstreamer changed state from NULL to READY ==> videoconvert0
[gstreamer] gstreamer changed state from NULL to READY ==> capsfilter0
[gstreamer] gstreamer changed state from NULL to READY ==> v4l2src0
[gstreamer] gstreamer changed state from NULL to READY ==> pipeline0
[gstreamer] gstreamer changed state from READY to PAUSED ==> videoconvert1
[gstreamer] gstreamer changed state from READY to PAUSED ==> capsfilter1
[gstreamer] gstreamer changed state from READY to PAUSED ==> videoconvert0
[gstreamer] gstreamer changed state from READY to PAUSED ==> capsfilter0
[gstreamer] gstreamer stream status CREATE ==> src
[gstreamer] gstreamer changed state from READY to PAUSED ==> v4l2src0
[gstreamer] gstreamer changed state from READY to PAUSED ==> pipeline0
[gstreamer] gstreamer msg new-clock ==> pipeline0
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> videoconvert1
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> capsfilter1
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> videoconvert0
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> capsfilter0
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> v4l2src0
[gstreamer] gstreamer stream status ENTER ==> src
[gstreamer] gstreamer msg stream-start ==> pipeline0

detectnet-camera:  camera open for streaming

detectnet-camera:  failed to capture frame
detectnet-camera:  failed to convert from NV12 to RGBA
detectNet::Detect( 0x(nil), 4096, 2160 ) -> invalid parameters
[cuda]   cudaNormalizeRGBA((float4*)imgRGBA, make_float2(0.0f, 255.0f), (float4*)imgRGBA, make_float2(0.0f, 1.0f), camera->GetWidth(), camera->GetHeight())
[cuda]      invalid device pointer (error 17) (hex 0x11)
[cuda]      /home/nvidia/jetson-inference/detectnet-camera/detectnet-camera.cpp:247
[cuda]   registered 141557760 byte openGL texture for interop access (4096x2160)
[gstreamer] gstCamera onPreroll
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x10adb0000 GPU 0x10adb0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x10c700000 GPU 0x10c700000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x10e050000 GPU 0x10e050000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x10f9a0000 GPU 0x10f9a0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x1112f0000 GPU 0x1112f0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x112c40000 GPU 0x112c40000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x114590000 GPU 0x114590000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x115ee0000 GPU 0x115ee0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x117830000 GPU 0x117830000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x119180000 GPU 0x119180000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x11aad0000 GPU 0x11aad0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x11c420000 GPU 0x11c420000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x11dd70000 GPU 0x11dd70000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x11f6c0000 GPU 0x11f6c0000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x121010000 GPU 0x121010000
[cuda]  cudaAllocMapped 26542080 bytes, CPU 0x122960000 GPU 0x122960000
[cuda]   gstreamer camera -- allocated 16 ringbuffers, 26542080 bytes each
[gstreamer] gstreamer changed state from READY to PAUSED ==> mysink
[gstreamer] gstreamer msg async-done ==> pipeline0
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> mysink
[gstreamer] gstreamer changed state from PAUSED to PLAYING ==> pipeline0
[cuda]   gstreamer camera -- allocated 16 RGBA ringbuffers
[gstreamer] gstreamer msg qos ==> v4l2src0
0 bounding BOXES detected
[gstreamer] gstreamer msg qos ==> v4l2src0
0 bounding BOXES detected
[gstreamer] gstreamer msg qos ==> v4l2src0
0 bounding BOXES detected
[gstreamer] gstreamer msg qos ==> v4l2src0
0 bounding BOXES detected
[gstreamer] gstreamer msg qos ==> v4l2src0

Then if you change monitor resolution to 3840x2160, there should still be some clip but very small, right? Do you have to use 4k camera resolution in your usecase? To me, current situation is simple, just change the camera resolution (please be aware of what I am talking about is “camera” resolution but not “display” resolution) or try to downscale the gltexture…

The camera allows a setting of 3840 x 2160, so if i get a new display screen of the same size, all will be good. I’ll still lose 250 pixels on one side, but there wont be clipping.

Whilst i try to find a cheap one on ebay, I’ll continue explore gltecture.cpp etc.

Thanks!

This is a couple months old, but I had a similar problem. The display creates based on your screen size, but the texture takes the size of the camera.

CUDA(cudaResizeRGBA((float4*)imgRGBA, camera->GetWidth(), camera->GetHeight(), 
	(float4*)imgRGBA, texture->GetWidth(), texture->GetHeight()));

Place the above before “CUDA(cudaNormalizeRGBA()” in the draw section at the bottom of the main loop.

In the section near the top where the code creates the display and texture, either set your texture size to a custom value or divide it by an amount that brings it into the size of your display properly. I divided the camera size by 2 for my needs.

texture = glTexture::Create(camera->GetWidth()/2, camera->GetHeight()/2, GL_RGBA32F_ARB/*GL_RGBA8*/);

No reason to buy a larger screen. You still get object detection and box drawing at 4k resolution and then resize the display image for what you need for your display.

@fox689 This worked a treat … Thanks!

Just had to add: #include “cudaResize.h”

Now have Logitech 4K camera working on Jetson TX2 at 4096 x 2160 pixels with FPS of 2.8.
Might be time to swap over to the Xavier.