Some questions with tegra_multimedia_api

Hi, I have a industry camera which could output image with yuv (RGB) with 30 fps. I want to encode the yuv real time.

Here are some questions:

  1. What is the difference between 03_video_enc_cuda and 01_video_encode ? These two samples are all hard encode method(with gpu)?

  2. Sample 01_video_encodes could encode from in file stream to out file stream. But the yuv from camera is frame by frame. Is this sample useful for my purpose that encode the yuv real time ? Or sample frontend?

  3. I can capture yuv images by SDK frame by frame. How can I transform these frames to image stream which could used for sample 01_video_encodes?

Thank you very much!

We have developped a H.264 streaming software based on this 01_video_encoder
and V4L2 sample source.
https://linuxtv.org/downloads/v4l-dvb-apis/uapi/v4l/capture.c.html

Our system:
USB (UVC) input → V4L2 Capture → NvVideoConverter → NvVideoEncoder → RTP / UDP

From your image SDK to NvBuffer, you will use memcpy() or cudaMemcpy2D().
If you need some changing format RGB/YUV, YUV422/YUV420, it is better to use NvVideoConverter.

Below is summary of our sources.

V4L2 Capture

void capV4L2::process_image(void *p, int length, uint64_t ts) {
{
	unsigned char *pDataOut = (unsigned char *) p;

	outBuf = (qData_t*) outBufferPool.getEmptyBuffer(100);
	if (outBuf) {
		outBuf->timestamp_us = timestamp;

		for (auto i = 0; i < 3; i++) { // Y, U, V
			int datasize = outBuf->plane[i].bytesperpixel
					* outBuf->plane[i].width;
			uint8_t *data = (uint8_t *) outBuf->plane[i].data;
			outBuf->plane[i].bytesused = 0;

			if (outBuf->plane[i].stride == datasize) {
				// Block Liner
				memcpy(data, pDataOut, datasize * outBuf->plane[i].height);
				pDataOut += datasize * outBuf->plane[i].height;
			} else {
				// Pitch Liner
#if 0
				for (auto j = 0; j < outBuf->plane[i].height; j++) {
					memcpy(data, pDataOut, datasize);
					data += outBuf->plane[i].stride;
					pDataOut += datasize;
				}
#else
				cudaMemcpy2D(data, outBuf->plane[i].stride, pDataOut, datasize, datasize,
					outBuf->plane[i].height, cudaMemcpyHostToHost);
				pDataOut += datasize * outBuf->plane[i].height;
#endif
			}
			outBuf->plane[i].bytesused = outBuf->plane[i].stride
					* outBuf->plane[i].height;
		}
		outBufferPool.pushFilledBuffer(outBuf);
	}
}

VideoConverter input

int VideoConverter::inBuf_to_conv0() {
	int ret;
	struct v4l2_buffer v4l2_buf;
	struct v4l2_plane planes[MAX_PLANES];
	NvBuffer *buffer;
	qData_t *inBuf;

retry:
	memset(&v4l2_buf, 0, sizeof(v4l2_buf));
	memset(planes, 0, sizeof(planes));

	v4l2_buf.m.planes = planes;

	if ((inBuf = (qData_t*)inBufferPool->getFilledBuffer(40)) == NULL) {
		if (!got_error && !mConv0->isInError() && !eos) {
			return 0;
		}
		return -1;
	}

	v4l2_buf.index = inBuf->index;
	v4l2_buf.timestamp.tv_sec = inBuf->timestamp_us / 1000000;
	v4l2_buf.timestamp.tv_usec = inBuf->timestamp_us % 1000000;
	buffer = (NvBuffer *) inBuf->nvBuf;
	for (auto i = 0; i < buffer->n_planes; i++) {
		NvBuffer::NvBufferPlane &plane = buffer->planes[i];
		plane.bytesused = inBuf->plane[i].bytesused;
	}

	while(!conv0_mtx.try_lock_for(std::chrono::milliseconds(100))){
		if (got_error || mConv0->isInError() || eos) {
			delete inBuf;
			return -1;
		}
	}
	if (m_debug) debugPrint("[CNV0] inBuf_to_conv0(): Locked");

	while ((mConv0->output_plane.getNumQueuedBuffers() > 1) /* && inBufferPool->getEmptyBufferSize() != 0 */) {
		uint64_t now_t = getTime();
		if ((inBuf->timestamp_us + (500 * 1000) < now_t)
				&& inBufferPool->getEmptyBufferSize() == 0) {
			for (auto i = 0; i < buffer->n_planes; i++) {
				inBuf->plane[i].bytesused = 0;
			}
			inBufferPool->pushEmptyBuffer(inBuf);
			goto retry;
		}
		std::this_thread::sleep_for(std::chrono::milliseconds(5));
	}
	delete inBuf;
	ret = mConv0->output_plane.qBuffer(v4l2_buf, NULL);
	if (ret < 0) {
		cerr << "[CNV0] inBuf_to_conv0(): Error while queueing buffer at conv0 output plane"
				<< endl;
		abort();
		return -1;
	}
	return 1;
}

VideoEncoder input

int VideoEncoder::inBuf_to_enc() {
	int ret;

	NvBuffer *buffer = nullptr;
	NvBuffer *sbuffer = nullptr;
	qData_t *inBuf;
	struct v4l2_buffer v4l2_buf;
	struct v4l2_plane planes[MAX_PLANES];
	memset(&v4l2_buf, 0, sizeof(v4l2_buf));
	memset(planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));

	if(prevConv){
		if ((inBuf = (qData_t*)inBufferPool->getFilledBuffer(30)) == NULL) {
			if(!mEnc->isInError() && !myExitFlag && !eos){
				return 0;
			}
			return -1;
		}
		sbuffer = (NvBuffer *)inBuf->data;
		v4l2_buf.timestamp.tv_sec = inBuf->timestamp_us / 1000000;
		v4l2_buf.timestamp.tv_usec = inBuf->timestamp_us % 1000000;
		v4l2_buf.sequence = seq;
#if 1		// 2018/03/22
		v4l2_buf.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
#endif
	}

	buffer = getOutNvBuffer(50);
	if(!buffer){
		// debug log output
	} else {
		v4l2_buf.index = buffer->index;
		v4l2_buf.m.planes = planes;

		ret = mEnc->output_plane.qBuffer(v4l2_buf, sbuffer);
		if (ret < 0) {
			cerr << "Error while queueing buffer at output plane" << endl;
			return -1;
		}
		seq++;
	}

	if(prevConv){
		inBufferPool->pushEmptyBuffer(inBuf);
	}

	return 1;
}

For your reference.