About the nano dec 4k jpeg fps

hi all ,i know that nano dec video Maximum performance is 1x 4K @ 60 (HEVC),but whether that dec jpeg is also 4k 60fps jpeg?

Hi,
It is not verified and may not achieve 4Kp60 in JPEG decoding. Suggest use H264 or H265 hardware decoder.

thanks for your help DaneLLL,i test dec a jpeg(3840*2160) cost about 43ms with using decodeToFd,so if want to enc to h264 for 25fps,but the encode 264 must use the virtual add ,so i have to copy the data from the decode dma fd,that may be cost about 12ms ,then the dec will cost about 55ms ,that will can not enc to h264 for 25fps ,i want to know whether the video encoder can do like the jpeg encoder use the encodetofd,so i do not cost the copy data time

Hi,
We support the mode. Please check the option in 01_video_encode:

        -mem_type_oplane <num> Specify memory type for the output plane to be used [1 = V4L2_MEMORY_MMAP, 2 = V4L2_MEMORY_USERPTR, 3 = V4L2_MEMORY_DMABUF]

Setting memory type to V4L2_MEMORY_DMABUF is to feed dmabuf fd to encoder. You can create dmabuf( by calling NvBufferCreateEx() ) and copy the decoded fd to the buffers by calling NvBufferTransform(). It utilizes hardware engine and performance is optimal.

so if i decode from jpeg use the decodeToFd ,whether i can use the dec dma fd directly ,do not use the NvBufferTransform copy ,because i test found that use the NvBufferTransform do i420 covert to NV21 cost about 12ms,so if i use the NvBufferTransform copy that may cost 12ms

Hi,
NV21 is not supported by hardware encoder. You need the buffer in I420 or NV12.

And it should work without NvBufferTransform(). Please look at

static int
setup_output_dmabuf(context_t *ctx, uint32_t num_buffers );

You can register the dec dma fd directly like:

ret = ctx->enc->output_plane.reqbufs(V4L2_MEMORY_DMABUF,num_buffers);
for (uint32_t i = 0; i < ctx->enc->output_plane.getNumBuffers(); i++)
{
    ctx->output_plane_fd[i]=_DEC_DMA_FD_;
}

thanks for your help ,my video enc is base on the 03_video_cuda_enc,i think just like you modify setup_output_dmabuf ,i do not know how to do the following source code in the15_multivideo_encode,whether if i can do not use the following code

for (uint32_t i = 0; i < ctx.enc->output_plane.getNumBuffers(); i++)
{
    struct v4l2_buffer v4l2_buf;
    struct v4l2_plane planes[MAX_PLANES];
    NvBuffer *buffer = ctx.enc->output_plane.getNthBuffer (i);

    memset (&v4l2_buf, 0, sizeof(v4l2_buf));
    memset (planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));

    v4l2_buf.index = i;
    v4l2_buf.m.planes = planes;

    if(ctx.output_memory_type == V4L2_MEMORY_DMABUF)
    {
        v4l2_buf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
        v4l2_buf.memory = V4L2_MEMORY_DMABUF;
        /* Map output plane buffer for memory type DMABUF. */
        ret = ctx.enc->output_plane.mapOutputBuffers (v4l2_buf, ctx.output_plane_fd[i]);

        if (ret < 0)
        {
            cerr << "Error while mapping buffer at output plane" << endl;
            abort (&ctx);
            goto cleanup;
        }
    }

    /* Read yuv frame data from input file */
    if (read_video_frame (ctx.in_file, *buffer) < 0)
    {
        cerr << "Could not read complete frame from input file" << endl;
        v4l2_buf.m.planes[0].bytesused = 0;

        eos = true;
        v4l2_buf.m.planes[0].m.userptr = 0;
        v4l2_buf.m.planes[0].bytesused = v4l2_buf.m.planes[1].bytesused
                                       = v4l2_buf.m.planes[2].bytesused = 0;
    }

    if (ctx.output_memory_type == V4L2_MEMORY_DMABUF ||
        ctx.output_memory_type == V4L2_MEMORY_MMAP)
    {
        for (uint32_t j = 0 ; j < buffer->n_planes; j++)
        {
            ret = NvBufferMemSyncForDevice (buffer->planes[j].fd, j,
                                        (void **)&buffer->planes[j].data);
            if (ret < 0)
            {
                cerr << "Error while NvBufferMemSyncForDevice at "
                        "output plane for V4L2_MEMORY_DMABUF" << endl;
                abort (&ctx);
                goto cleanup;
            }
        }
    }

    if (ctx.output_memory_type == V4L2_MEMORY_DMABUF)
    {
        for (uint32_t j = 0 ; j < buffer->n_planes ; j++)
        {
            v4l2_buf.m.planes[j].bytesused = buffer->planes[j].bytesused;
        }
    }
    /* encoder qbuffer for output plane */
    ret = ctx.enc->output_plane.qBuffer (v4l2_buf, NULL);
    if (ret < 0)
    {
        cerr << "Error while queueing buffer at output plane" << endl;
        abort (&ctx);
        goto cleanup;
    }
    if (v4l2_buf.m.planes[0].bytesused == 0)
    {
        cerr << "File read complete." << endl;
        eos = true;
        break;
    }
    ctx.input_frames_queued_count++;
}

Hi,
You don’t need to call mapOutputBuffers() since your data is in the buffer already. The function call is for copying data from file.

that following is my interface ,but that run error:
NvMapMemCacheMaint:1075334668 failed [14]
NvMapMemCacheMaint:1075334668 failed [14]
File read complete
int video_encode_putdata_from_dma(PMAX_ENCODE_VIDEO_CONTEXT pContext, int dec_dma_fd)
{
int ret = 0;

if(!pContext)
{
	cout<<"video encode not init!!!"<<endl;
	return -1;
}

context_t *ctx = (context_t *)pContext->object;
if(!ctx)
{
	cout<<"ctx is NULL!!!"<<endl;
	return -1;
}

if(ctx->got_firstframe == 0)
{
	ctx->enc->capture_plane.startDQThread(ctx);
	    /* Enqueue all the empty capture plane buffers */
    //for (uint32_t i = 0; i < ctx->enc->capture_plane.getNumBuffers(); i++)
    {
        struct v4l2_buffer v4l2_buf;
        struct v4l2_plane planes[MAX_PLANES];

        memset(&v4l2_buf, 0, sizeof(v4l2_buf));
        memset(planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));

        v4l2_buf.index = 0;//i;
        v4l2_buf.m.planes = planes;

        ret = ctx->enc->capture_plane.qBuffer(v4l2_buf, NULL);
        if (ret < 0)
        {
            cerr << "Error while queueing buffer at capture plane" << endl;
            abort(ctx);
        }
    }

    /* Read video frame and queue all the output plane buffers */
    //for (uint32_t i = 0; i < ctx->enc->output_plane.getNumBuffers() &&
    //        !ctx->got_error; i++)
    {
        struct v4l2_buffer v4l2_buf;
        struct v4l2_plane planes[MAX_PLANES];
        NvBuffer *buffer = ctx->enc->output_plane.getNthBuffer(0);
        int fd;
        void **dat;

        memset(&v4l2_buf, 0, sizeof(v4l2_buf));
        memset(planes, 0, MAX_PLANES * sizeof(struct v4l2_plane));

        v4l2_buf.index = 0;//i;
        v4l2_buf.m.planes = planes;

        /**
         * buffer is touched by CPU in read_video_frame(), so NvBufferMemSyncForDevice()
         * is needed to flash cached data to memory.
         */
        fd = buffer->planes[0].fd = dec_dma_fd;
        for (uint32_t j = 0 ; j < buffer->n_planes ; j++)
        {
            dat = (void **)&buffer->planes[j].data;
            ret = NvBufferMemSyncForDevice (fd, j, dat);
            if (ret < 0)
            {
                cerr << "Error while NvBufferMemSyncForDevice at output plane" << endl;
                abort(ctx);
            }
        }

        ret = ctx->enc->output_plane.qBuffer(v4l2_buf, NULL);
        if (ret < 0)
        {
            cerr << "Error while queueing buffer at output plane" << endl;
            abort(ctx);
        }

        if (v4l2_buf.m.planes[0].bytesused == 0)
        {
            cerr << "File read complete." << endl;
        }
		ctx->got_firstframe = 1;
    }
}
else
{
	/* Keep reading input till EOS is reached */
    if (!ctx->got_error && !ctx->enc->isInError())
    {
        struct v4l2_buffer v4l2_buf;
        struct v4l2_plane planes[MAX_PLANES];
        NvBuffer *buffer;
        int fd;
        void **dat;

        memset(&v4l2_buf, 0, sizeof(v4l2_buf));
        memset(planes, 0, sizeof(planes));

        v4l2_buf.m.planes = planes;

        if (ctx->enc->output_plane.dqBuffer(v4l2_buf, &buffer, NULL, 10) < 0)
        {
            cerr << "ERROR while DQing buffer at output plane" << endl;
            abort(ctx);
        }

        /**
         * buffer is touched by CPU in read_video_frame(), so NvBufferMemSyncForDevice()
         * is needed to flash cached data to memory.
         */
        fd = buffer->planes[0].fd = dec_dma_fd;
        for (uint32_t j = 0 ; j < buffer->n_planes ; j++)
        {
            dat = (void **)&buffer->planes[j].data;
            ret = NvBufferMemSyncForDevice (fd, j, dat);
            if (ret < 0)
            {
                cerr << "Error while NvBufferMemSyncForDevice at output plane" << endl;
                abort(ctx);
            }
        }

        ret = ctx->enc->output_plane.qBuffer(v4l2_buf, NULL);
        if (ret < 0)
        {
            cerr << "Error while queueing buffer at output plane" << endl;
            abort(ctx);
        }

    }
}

return ret;

}

Hi,
If there is difficulty in integration, you may keep NvBufferTransform() and try to set VIC at max clocks:
Nvvideoconvert issue, nvvideoconvert in DS4 is better than Ds5?

See if it gives performance improvement.
Actually for achieving 4K 25fps, the bottleneck looks to be in JPEG decoding, not VIC.

yes ,but i just want to Minimize data conversion time,so use the dec dma fd to enc directly,that have not sample demo ,just can be tryed!