/* * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "NvUtils.h" #include "NvCudaProc.h" #include "nvbuf_utils.h" #include #include #include #include #include #include #include #include #include #include #include #include "myDefine.h" #include "nvosd.h" #include "CudaDecode.h" #define TEST_ERROR(cond, str, label) if(cond) { \ printf("%s\n", str); \ error = 1; \ goto label; } #define ERROR_HANDLE(cond, str) if(cond) { \ printf("%s\n", str);\ return FALSE; } #define CHUNK_SIZE 4000000 #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #define IS_NAL_UNIT_START(buffer_ptr) (!buffer_ptr[0] && !buffer_ptr[1] && \ !buffer_ptr[2] && (buffer_ptr[3] == 1)) #define IS_NAL_UNIT_START1(buffer_ptr) (!buffer_ptr[0] && !buffer_ptr[1] && \ (buffer_ptr[2] == 1)) using namespace std; extern void bail(const char *on_what); extern void BailIfNotEqual(const char *on_what, int iRet, const int iValue = 0); void abort(context_t *ctx) { ctx->got_error = true; ctx->dec->abort(); if (ctx->conv) { ctx->conv->abort(); pthread_cond_broadcast(&ctx->queue_cond); } } bool conv0_output_dqbuf_thread_callback(struct v4l2_buffer *v4l2_buf, NvBuffer * buffer, NvBuffer *shared_buffer, void *arg) { context_t *pCtx = (context_t *) arg; struct v4l2_buffer dec_capture_ret_buffer; struct v4l2_plane planes[MAX_PLANES]; if (!v4l2_buf) { cerr << "Failed to dequeue buffer from conv0 output plane" << endl; abort(pCtx); return false; } if (v4l2_buf->m.planes[0].bytesused == 0) { return false; } memset(&dec_capture_ret_buffer, 0, sizeof(dec_capture_ret_buffer)); memset(planes, 0, sizeof(planes)); dec_capture_ret_buffer.index = shared_buffer->index; dec_capture_ret_buffer.m.planes = planes; pthread_mutex_lock(&pCtx->queue_lock); pCtx->conv_output_plane_buf_queue->push(buffer); // Return the buffer dequeued from converter output plane // back to decoder capture plane if (pCtx->dec->capture_plane.qBuffer(dec_capture_ret_buffer, NULL) < 0) { abort(pCtx); return false; } pthread_cond_broadcast(&pCtx->queue_cond); pthread_mutex_unlock(&pCtx->queue_lock); return true; } bool conv0_capture_dqbuf_thread_callback(struct v4l2_buffer *v4l2_buf, NvBuffer * buffer, NvBuffer * shared_buffer, void *arg) { context_t *pCtx = (context_t *) arg; if (!v4l2_buf) { cerr << "Failed to dequeue buffer from conv0 output plane" << endl; abort(pCtx); return false; } if (v4l2_buf->m.planes[0].bytesused == 0) { return false; } // Create EGLImage from dmabuf fd pCtx->egl_image = NvEGLImageFromFd(pCtx->egl_display, buffer->planes[0].fd); if (pCtx->egl_image == NULL) { fprintf(stderr, "Error while mapping dmabuf fd (0x%X) to EGLImage\n", buffer->planes[0].fd); return false; } // Running algo process with EGLImage via GPU multi cores HandleEGLImage(&pCtx->egl_image); // Destroy EGLImage NvDestroyEGLImage(pCtx->egl_display, pCtx->egl_image); pCtx->egl_image = NULL; // Write raw video frame to file and return the buffer to converter // capture plane if (pCtx->out_file) { pCtx->iOutFrameCount++; if (pCtx->iOutFrameCount % 5 == 0) { //printf("pCtx->iOutFrameCount:%d\n", pCtx->iOutFrameCount); //write_video_frame(pCtx->out_file, *buffer); } } if (pCtx->conv->capture_plane.qBuffer(*v4l2_buf, NULL) < 0) { return false; } return true; } CudaDecode::CudaDecode() { m_iInitBufCount = 0; m_dec_capture_loop_thread_id = 0; m_bInitBufComplete = FALSE; m_iChan = 0; } CudaDecode::~CudaDecode() { printf("CudaDecode::~CudaDecode\n"); } BOOL CudaDecode::Open() { printf("CudaDecode::Open\n"); set_defaults(&m_ctx); // Get defalut EGL display m_ctx.egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); if (m_ctx.egl_display == EGL_NO_DISPLAY) { printf("Error while get EGL display connection\n"); return FALSE; } // Init EGL display connection if (!eglInitialize(m_ctx.egl_display, NULL, NULL)) { printf("Erro while initialize EGL display connection\n"); return FALSE; } m_ctx.dec = NvVideoDecoder::createVideoDecoder("dec0"); if (!m_ctx.dec) { printf("Could not create decoder\n"); return FALSE; } int ret = 0; // Subscribe to Resolution change event ret = m_ctx.dec->subscribeEvent(V4L2_EVENT_RESOLUTION_CHANGE, 0, 0); ERROR_HANDLE(ret < 0, "Could not subscribe to V4L2_EVENT_RESOLUTION_CHANGE"); // Set V4L2_CID_MPEG_VIDEO_DISABLE_COMPLETE_FRAME_INPUT control to false // so that application can send chunks of encoded data instead of forming // complete frames. This needs to be done before setting format on the // output plane. ret = m_ctx.dec->disableCompleteFrameInputBuffer(); ERROR_HANDLE(ret < 0, "Error in decoder disableCompleteFrameInputBuffer"); // Set format on the output plane ret = m_ctx.dec->setOutputPlaneFormat(m_ctx.decoder_pixfmt, CHUNK_SIZE); ERROR_HANDLE(ret < 0, "Could not set output plane format"); // Query, Export and Map the output plane buffers so that we can read // encoded data into the buffers ret = m_ctx.dec->output_plane.setupPlane(V4L2_MEMORY_MMAP, 10, true, false); ERROR_HANDLE(ret < 0, "Error while setting up output plane"); // Create converter to convert from BL to PL for writing raw video // to file or crop the frame and display m_ctx.conv = NvVideoConverter::createVideoConverter("dev0"); ERROR_HANDLE(!m_ctx.conv, "Could not create video converter"); m_ctx.conv->output_plane.setDQThreadCallback(conv0_output_dqbuf_thread_callback); m_ctx.conv->capture_plane.setDQThreadCallback(conv0_capture_dqbuf_thread_callback); ret = m_ctx.dec->output_plane.setStreamStatus(true); ERROR_HANDLE(ret < 0, "Error in output plane stream on"); printf("create_dec_capture_loop_thread before\n"); this->create_dec_capture_loop_thread(); printf("CudaDecode::Open() end\n"); return TRUE; } BOOL CudaDecode::Close() { int ret = 0; // After sending EOS, all the buffers from output plane should be dequeued. // and after that capture plane loop should be signalled to stop. while (m_ctx.dec->output_plane.getNumQueuedBuffers() > 0 && !m_ctx.got_error && !m_ctx.dec->isInError()) { struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(planes, 0, sizeof(planes)); v4l2_buf.m.planes = planes; ret = m_ctx.dec->output_plane.dqBuffer(v4l2_buf, NULL, NULL, -1); if (ret < 0) { printf("Error DQing buffer at output plane\n"); abort(&m_ctx); break; } } // Signal EOS to the decoder capture loop m_ctx.got_eos = true; if (m_ctx.conv) { m_ctx.conv->capture_plane.waitForDQThread(-1); } pthread_join(m_dec_capture_loop_thread_id, NULL); // The decoder destructor does all the cleanup i.e set streamoff on output and capture planes, // unmap buffers, tell decoder to deallocate buffer (reqbufs ioctl with counnt = 0), // and finally call v4l2_close on the fd. delete m_ctx.dec; delete m_ctx.conv; //delete m_ctx.out_file; delete m_ctx.conv_output_plane_buf_queue; // Terminate EGL display connection if (m_ctx.egl_display) { if(!eglTerminate(m_ctx.egl_display)) { fprintf(stderr, "Error while terminate EGL display connection\n"); return FALSE; } } return TRUE; } int CudaDecode::sendEOStoConverter(context_t *pCtx) { // Check if converter is running if (pCtx->conv->output_plane.getStreamStatus()) { NvBuffer *conv_buffer; struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(&planes, 0, sizeof(planes)); v4l2_buf.m.planes = planes; pthread_mutex_lock(&pCtx->queue_lock); while (pCtx->conv_output_plane_buf_queue->empty()) { pthread_cond_wait(&pCtx->queue_cond, &pCtx->queue_lock); } conv_buffer = pCtx->conv_output_plane_buf_queue->front(); pCtx->conv_output_plane_buf_queue->pop(); pthread_mutex_unlock(&pCtx->queue_lock); v4l2_buf.index = conv_buffer->index; // Queue EOS buffer on converter output plane return pCtx->conv->output_plane.qBuffer(v4l2_buf, NULL); } return 0; } void CudaDecode::read_decoder_input_chunk(NvBuffer *buffer, unsigned char* pFrame, int iFrameSize) { memcpy((char *) buffer->planes[0].data, pFrame, iFrameSize); // It is necessary to set bytesused properly, so that decoder knows how // many bytes in the buffer are valid buffer->planes[0].bytesused = iFrameSize; return; } BOOL CudaDecode::query_and_set_capture(context_t *pCtx) { NvVideoDecoder *dec = pCtx->dec; struct v4l2_format format; struct v4l2_crop crop; int32_t min_dec_capture_buffers; int ret = 0; int error = 0; // Get capture plane format from the decoder. This may change after // an resolution change event // 获取解码后视频plane的格式 ret = dec->capture_plane.getFormat(format); TEST_ERROR(ret < 0, "Error: Could not get format from decoder capture plane", error); // Get the display resolution from the decoder // 获取解码后视频的分辨率 ret = dec->capture_plane.getCrop(crop); TEST_ERROR(ret < 0, "Error: Could not get crop from decoder capture plane", error); cout << "Video Resolution: " << crop.c.width << "x" << crop.c.height << endl; // For file write, first deinitialize output and capture planes // of video converter and then use the new resolution from // decoder event resolution change if (pCtx->conv) { ret = sendEOStoConverter(pCtx); TEST_ERROR(ret < 0, "Error while queueing EOS buffer on converter output", error); // Wait for EOS buffer to arrive on capture plane pCtx->conv->capture_plane.waitForDQThread(2000); pCtx->conv->output_plane.deinitPlane(); pCtx->conv->capture_plane.deinitPlane(); while(!pCtx->conv_output_plane_buf_queue->empty()) { pCtx->conv_output_plane_buf_queue->pop(); } } // deinitPlane unmaps the buffers and calls REQBUFS with count 0 dec->capture_plane.deinitPlane(); // Not necessary to call VIDIOC_S_FMT on decoder capture plane. // But decoder setCapturePlaneFormat function updates the class variables ret = dec->setCapturePlaneFormat(format.fmt.pix_mp.pixelformat, format.fmt.pix_mp.width, format.fmt.pix_mp.height); TEST_ERROR(ret < 0, "Error in setting decoder capture plane format", error); // Get the minimum buffers which have to be requested on the capture plane ret = dec->getMinimumCapturePlaneBuffers(min_dec_capture_buffers); TEST_ERROR(ret < 0, "Error while getting value of minimum capture plane buffers", error); // Request (min + 5) buffers, export and map buffers ret = dec->capture_plane.setupPlane(V4L2_MEMORY_MMAP, min_dec_capture_buffers + 5, false, false); TEST_ERROR(ret < 0, "Error in decoder capture plane setup", error); // For file write, first deinitialize output and capture planes // of video converter and then use the new resolution from // decoder event resolution change if (pCtx->conv) { ret = pCtx->conv->setOutputPlaneFormat(format.fmt.pix_mp.pixelformat, format.fmt.pix_mp.width, format.fmt.pix_mp.height, V4L2_NV_BUFFER_LAYOUT_BLOCKLINEAR); TEST_ERROR(ret < 0, "Error in converter output plane set format", error); ret = pCtx->conv->setCapturePlaneFormat((pCtx->out_pixfmt == 1 ? V4L2_PIX_FMT_NV12M : V4L2_PIX_FMT_YUV420M), crop.c.width, crop.c.height, V4L2_NV_BUFFER_LAYOUT_PITCH); TEST_ERROR(ret < 0, "Error in converter capture plane set format", error); ret = pCtx->conv->setCropRect(0, 0, crop.c.width, crop.c.height); TEST_ERROR(ret < 0, "Error while setting crop rect", error); ret = pCtx->conv->output_plane.setupPlane(V4L2_MEMORY_DMABUF, dec->capture_plane. getNumBuffers(), false, false); TEST_ERROR(ret < 0, "Error in converter output plane setup", error); ret = pCtx->conv->capture_plane.setupPlane(V4L2_MEMORY_MMAP, dec->capture_plane. getNumBuffers(), true, false); TEST_ERROR(ret < 0, "Error in converter capture plane setup", error); ret = pCtx->conv->output_plane.setStreamStatus(true); TEST_ERROR(ret < 0, "Error in converter output plane streamon", error); ret = pCtx->conv->capture_plane.setStreamStatus(true); TEST_ERROR(ret < 0, "Error in converter output plane streamoff", error); // Add all empty conv output plane buffers to conv_output_plane_buf_queue for (uint32_t i = 0; i < pCtx->conv->output_plane.getNumBuffers(); i++) { pCtx->conv_output_plane_buf_queue->push(pCtx->conv->output_plane.getNthBuffer(i)); } for (uint32_t i = 0; i < pCtx->conv->capture_plane.getNumBuffers(); i++) { struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(planes, 0, sizeof(planes)); v4l2_buf.index = i; v4l2_buf.m.planes = planes; ret = pCtx->conv->capture_plane.qBuffer(v4l2_buf, NULL); TEST_ERROR(ret < 0, "Error Qing buffer at converter capture plane", error); } pCtx->conv->output_plane.startDQThread(pCtx); pCtx->conv->capture_plane.startDQThread(pCtx); } // Capture plane STREAMON ret = dec->capture_plane.setStreamStatus(true); TEST_ERROR(ret < 0, "Error in decoder capture plane streamon", error); // Enqueue all the empty capture plane buffers for (uint32_t i = 0; i < dec->capture_plane.getNumBuffers(); i++) { struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(planes, 0, sizeof(planes)); v4l2_buf.index = i; v4l2_buf.m.planes = planes; ret = dec->capture_plane.qBuffer(v4l2_buf, NULL); TEST_ERROR(ret < 0, "Error Qing buffer at output plane", error); } cout << "Query and set capture successful" << endl; return TRUE; error: if (error) { abort(pCtx); cerr << "Error in " << __func__ << endl; } return FALSE; } void* dec_capture_loop_thread(void *arg) { CudaDecode* pClass = (CudaDecode *)arg; pClass->dec_capture_loop_fcn(); return NULL; } void CudaDecode::create_dec_capture_loop_thread() { int threadResult; threadResult = pthread_create(&m_dec_capture_loop_thread_id, NULL, dec_capture_loop_thread, this); BailIfNotEqual("create thread error!", threadResult, 0); } void CudaDecode::dec_capture_loop_fcn() { context_t *pCtx = &m_ctx; NvVideoDecoder *dec = pCtx->dec; struct v4l2_event ev; int ret; prctl (PR_SET_NAME, "dec_cap", 0, 0, 0); // Need to wait for the first Resolution change event, so that // the decoder knows the stream resolution and can allocate appropriate // buffers when we call REQBUFS // 循环等待第一次分辨率改变事件 do { ret = dec->dqEvent(ev, 1000); if (ret < 0) { if (errno == EAGAIN) { //printf("Timed out waiting for first V4L2_EVENT_RESOLUTION_CHANGE\n"); continue; } else { printf("Error in dequeueing decoder event\n"); } abort(pCtx); break; } } while (ev.type != V4L2_EVENT_RESOLUTION_CHANGE); printf("ev.type == V4L2_EVENT_RESOLUTION_CHANGE\n"); // query_and_set_capture acts on the resolution change event if (!pCtx->got_error) { query_and_set_capture(pCtx); } // Exit on error or EOS which is signalled in main() while (!(pCtx->got_error || dec->isInError() || pCtx->got_eos)) { NvBuffer *dec_buffer; // Check for Resolution change again ret = dec->dqEvent(ev, false); if (ret == 0) { switch (ev.type) { case V4L2_EVENT_RESOLUTION_CHANGE: query_and_set_capture(pCtx); continue; } } while (1) { struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(planes, 0, sizeof(planes)); v4l2_buf.m.planes = planes; // Dequeue a filled buffer if (dec->capture_plane.dqBuffer(v4l2_buf, &dec_buffer, NULL, 0)) { if (errno == EAGAIN) { usleep(1000); } else { abort(pCtx); cerr << "Error while calling dequeue at capture plane" << endl; } break; } // Give the buffer to video converter output plane if (pCtx->conv) { NvBuffer *conv_buffer; struct v4l2_buffer conv_output_buffer; struct v4l2_plane conv_planes[MAX_PLANES]; memset(&conv_output_buffer, 0, sizeof(conv_output_buffer)); memset(conv_planes, 0, sizeof(conv_planes)); conv_output_buffer.m.planes = conv_planes; // Get an empty conv output plane buffer from conv_output_plane_buf_queue pthread_mutex_lock(&pCtx->queue_lock); while (pCtx->conv_output_plane_buf_queue->empty()) { pthread_cond_wait(&pCtx->queue_cond, &pCtx->queue_lock); } conv_buffer = pCtx->conv_output_plane_buf_queue->front(); pCtx->conv_output_plane_buf_queue->pop(); pthread_mutex_unlock(&pCtx->queue_lock); conv_output_buffer.index = conv_buffer->index; if (pCtx->conv->output_plane. qBuffer(conv_output_buffer, dec_buffer) < 0) { abort(pCtx); cerr << "Error while queueing buffer at converter output plane" << endl; break; } } else { if (pCtx->dec->capture_plane.qBuffer(v4l2_buf, NULL) < 0) { abort(pCtx); cerr << "Error while queueing buffer at decoder capture plane" << endl; break; } } } } // Send EOS to converter if (pCtx->conv) { if (sendEOStoConverter(pCtx) < 0) { cerr << "Error while queueing EOS buffer on converter output" << endl; } } } void CudaDecode::set_defaults(context_t *pCtx) { memset(pCtx, 0, sizeof(context_t)); // out_pixfmt shoud be 1(NV12), 2(I420) pCtx->out_pixfmt = 2; pCtx->conv_output_plane_buf_queue = new queue < NvBuffer * >; pthread_mutex_init(&pCtx->queue_lock, NULL); pthread_cond_init(&pCtx->queue_cond, NULL); pCtx->decoder_pixfmt = V4L2_PIX_FMT_H264; //pCtx->decoder_pixfmt = V4L2_PIX_FMT_H265; log_level = LOG_LEVEL_INFO; //log_level = LOG_LEVEL_ERROR; //log_level = LOG_LEVEL_WARN; //log_level = LOG_LEVEL_DEBUG; char tmpBuf[1024]; sprintf(tmpBuf, "/disk1/nfs/tx1_project/data/test_cuda_%d.yuv", m_iChan); m_ctx.out_file = new ofstream(tmpBuf); m_ctx.iOutFrameCount = 0; } BOOL CudaDecode::DecodeFrame(unsigned char* pFrame, int iFrameSize) { int ret = 0; int iNumBuf = 0; struct v4l2_buffer v4l2_buf; struct v4l2_plane planes[MAX_PLANES]; NvBuffer *buffer = NULL; memset(&v4l2_buf, 0, sizeof(v4l2_buf)); memset(planes, 0, sizeof(planes)); v4l2_buf.m.planes = planes; if (m_ctx.got_error) { printf("DecodeFrame m_ctx.got_error!\n"); return FALSE; } if (m_ctx.dec->isInError()) { printf("DecodeFrame m_ctx.dec->isInError!\n"); return FALSE; } if (!m_bInitBufComplete) { iNumBuf = m_ctx.dec->output_plane.getNumBuffers(); if (m_iInitBufCount < iNumBuf) { buffer = m_ctx.dec->output_plane.getNthBuffer(m_iInitBufCount); v4l2_buf.index = m_iInitBufCount; m_iInitBufCount++; } if(m_iInitBufCount == iNumBuf) { m_bInitBufComplete = TRUE; } } else { ret = m_ctx.dec->output_plane.dqBuffer(v4l2_buf, &buffer, NULL, -1); if (ret < 0) { printf("Error DQing buffer at output plane\n"); abort(&m_ctx); return FALSE; } } read_decoder_input_chunk(buffer, pFrame, iFrameSize); //printf("after read_decoder_input_chunk iFrameSize:%d\n", iFrameSize); v4l2_buf.m.planes[0].bytesused = buffer->planes[0].bytesused; ret = m_ctx.dec->output_plane.qBuffer(v4l2_buf, NULL); if (ret < 0) { printf("Error Qing buffer at output plane\n"); abort(&m_ctx); return FALSE; } return TRUE; } BOOL CudaDecode::GetYuv420Data(char* pBufOutput, int &nBytesOutput) { return TRUE; } void CudaDecode::CudaDecodeTest() { CudaDecode decode; decode.Open(); decode.Close(); }