Thanks. How could I get the similar performance with sample-code 04-rescale if I need to wrapper nvbuffer fd to VPIImage, and pass it into vpiSubmitRescale()? Below is my test code. Thanks.
int main()
{
int in_dmabuf_fd;
int out_dmabuf_fd;
NvBufferCreateParams input_params;
input_params.width = 1920;
input_params.height = 1080;
input_params.layout = NvBufferLayout_Pitch;
input_params.payloadType = NvBufferPayload_SurfArray;
input_params.colorFormat = NvBufferColorFormat_NV12;
input_params.nvbuf_tag = NvBufferTag_VIDEO_CONVERT;
NvBufferCreateEx(&in_dmabuf_fd, &input_params);
NvBufferCreateParams output_params;
output_params.width = 640;
output_params.height = 480;
output_params.layout = NvBufferLayout_Pitch;
output_params.payloadType = NvBufferPayload_SurfArray;
output_params.colorFormat = NvBufferColorFormat_NV12;
output_params.nvbuf_tag = NvBufferTag_VIDEO_CONVERT;
NvBufferCreateEx(&out_dmabuf_fd, &output_params);
NvBufferParams param1;
NvBufferGetParams(in_dmabuf_fd, ¶m1);
void *inputY = nullptr;
NvBufferMemMap(in_dmabuf_fd, 0, NvBufferMem_Write, &inputY);
void *inputUV = nullptr;
NvBufferMemMap(in_dmabuf_fd, 1, NvBufferMem_Write, &inputUV);
NvBufferMemSyncForCpu(in_dmabuf_fd, 0, &inputY);
NvBufferMemSyncForCpu(in_dmabuf_fd, 1, &inputUV);
VPIImageData inImgData;
memset(&inImgData, 0, sizeof(inImgData));
inImgData.type = VPI_IMAGE_FORMAT_NV12;
inImgData.numPlanes = 2;
inImgData.planes[0].width = param1.width[0];
inImgData.planes[0].height = param1.height[0];
inImgData.planes[0].pitchBytes = param1.pitch[0];
inImgData.planes[0].data = inputY;
inImgData.planes[1].width = param1.width[1];
inImgData.planes[1].height = param1.height[1];
inImgData.planes[1].pitchBytes = param1.pitch[1];
inImgData.planes[1].data = inputUV;
NvBufferParams param2;
NvBufferGetParams(out_dmabuf_fd, ¶m2);
void *outputY = nullptr;
NvBufferMemMap(out_dmabuf_fd, 0, NvBufferMem_Write, &outputY);
void *outputUV = nullptr;
NvBufferMemMap(out_dmabuf_fd, 1, NvBufferMem_Write, &outputUV);
NvBufferMemSyncForCpu(out_dmabuf_fd, 0, &outputY);
NvBufferMemSyncForCpu(out_dmabuf_fd, 1, &outputUV);
VPIImageData outImgData;
memset(&outImgData, 0, sizeof(outImgData));
outImgData.type = VPI_IMAGE_FORMAT_NV12;
outImgData.numPlanes = 2;
outImgData.planes[0].width = param2.width[0];
outImgData.planes[0].height = param2.height[0];
outImgData.planes[0].pitchBytes = param2.pitch[0];
outImgData.planes[0].data = outputY;
outImgData.planes[1].width = param2.width[1];
outImgData.planes[1].height = param2.height[1];
outImgData.planes[1].pitchBytes = param2.pitch[1];
outImgData.planes[1].data = outputUV;
VPIEvent evStop = nullptr;
VPIEvent evStart = nullptr;
float elapsedMS;
VPIStream stream;
VPIBackend backendType = VPI_BACKEND_CUDA;
vpiStreamCreate(backendType, &stream);
VPIImage input = nullptr;
VPIImage output = nullptr;
vpiImageCreateHostMemWrapper(&inImgData, 0, &input);
vpiImageCreateHostMemWrapper(&outImgData, 0, &output);
vpiEventCreate(0, &evStart);
vpiEventCreate(0, &evStop);
vpiEventRecord(evStart, stream);
for (int j = 0; j < 30000; j++) {
vpiSubmitRescale(stream, VPI_BACKEND_CUDA, input, output, VPI_INTERP_LINEAR, VPI_BOUNDARY_COND_ZERO);
}
vpiEventRecord(evStop, stream);
vpiEventSync(evStop);
vpiEventElapsedTime(evStart, evStop, &elapsedMS);
std::cout << elapsedMS / 30000 << "ms per frame." << std::endl;
// Clean up
NvBufferDestroy(in_dmabuf_fd);
NvBufferDestroy(out_dmabuf_fd);
vpiImageDestroy(input);
vpiImageDestroy(output);
vpiStreamDestroy(stream);
vpiEventDestroy(evStop);
vpiEventDestroy(evStart);
return 0;
}