On tx2,different jetpack version,NvBufferTransform time cost different

First ,read 1920x1080 jpg and decodeToFd ,NvBufferTransform to NvBufferColorFormat_YUV420 ;

On R32,5.0,test NvBufferTransform 1920x1080 yuv420 -->1920x1080 yuv40 cost 8ms;
On R32,2.3,test NvBufferTransform 1920x1080 yuv420 -->1920x1080 yuv40 cost 3ms;
3ms and 8ms is so poor,how can i transform fast ?
just copy and transform,how can i use NvBufferTransform to get good performance ?

this is my test code:
int CreateBufFD()
{
int i;
NvBufferCreateParams input_params = {0};
input_params.payloadType = NvBufferPayload_SurfArray;
input_params.width = OSD_WIDTH;
input_params.height = OSD_HEIGHT;
input_params.layout = NvBufferLayout_Pitch;
input_params.nvbuf_tag = NvBufferTag_VIDEO_CONVERT;
input_params.colorFormat = NvBufferColorFormat_YUV420;
//input_params.colorFormat = NvBufferColorFormat_ABGR32;
for(i=0;i<NUM_BUFFS;i++)
{
if (NvBufferCreateEx(&buf_fd[i], &input_params) == -1)
{
plog(m_nChannel, “Failed to create DrmRender tmp NvBuffer”);
return -1;
}
plog(m_nChannel, “###create DrmRender tmp NvBuffer[%d-%d]”,i,buf_fd[i]);
}
return 0;
}

bool Transform(int srcfd, int index)
{
NvBufferTransformParams transform_params={0};
transform_params.transform_flag = NVBUFFER_TRANSFORM_FILTER;
transform_params.transform_flip = NvBufferTransform_None;
transform_params.transform_filter = NvBufferTransform_Filter_Smart;
transform_params.src_rect.top = 0;
transform_params.src_rect.left = 0;
transform_params.src_rect.width = OSD_WIDTH;
transform_params.src_rect.height = OSD_HEIGHT;
transform_params.dst_rect.top = 0;
transform_params.dst_rect.left = 0;
transform_params.dst_rect.width = OSD_WIDTH;
transform_params.dst_rect.height = OSD_HEIGHT;

/* convert frame from UYVY to RGBA, and overlay text */
/* UYVY --> ARGB32 */
if (NvBufferTransform(srcfd, buf_fd[index], &transform_params) < 0)
{
    plog(m_nChannel, "NvBufferTransform() UYVY --> ARGB32 fail!");
    return false;
}

return true;

}

int main(int argc, char * argv)
{
GMainLoop *main_loop = g_main_loop_new(NULL, FALSE);
int count=0;
int index;
int nRet;
struct timeval tv1, tv2;
uint32_t diff=0;
if(!createfd())
{
plog(0,“Enter CreateFD failed”);
return 0;
}
if(CreateBufFD()!=0)
{
plog(0,“CreateBufFD failed”);
return 0;
}
while(1)
{
index=count++%NUM_BUFFS;
gettimeofday(&tv1, NULL);
Transform(m_nNoSignalFD,index);
gettimeofday(&tv2, NULL);
diff+=get_time_diff_ms(&tv1, &tv2);
if(count%5==0)
{
plog(0,“cost:%f,count:%d”,diff/5.0,count);
diff=0;
}
sleep(1);
}
g_main_loop_run(main_loop);

//g_main_loop_quit(loop);
plog(0, "main() exit! " );
return 0;

}

refer to the top " Fast copy of DMA buffers via NvBufferTransform"
i do the step:
root@nvidia-desktop:~# echo on > /sys/devices/13e10000.host1x/15340000.vic/power/control
root@nvidia-desktop:~# echo userspace > /sys/devices/13e10000.host1x/15340000.vic/devfreq/15340000.vic/governor
root@nvidia-desktop:~# cat /sys/devices/13e10000.host1x/15340000.vic/devfreq/15340000.vic/available_frequencies
115200000 268800000 409600000 550400000 691200000 832000000 972800000 1024000000
root@nvidia-desktop:~# echo 1024000000 > /sys/devices/13e10000.host1x/15340000.vic/devfreq/15340000.vic/max_freq
root@nvidia-desktop:~# echo 1024000000 >
/sys/devices/13e10000.host1x/15340000.vic/devfreq/15340000.vic/userspace/set_freq

but the performace is also not good,from 7ms,8ms to 2ms,3ms

this is the best performace?

Hi,
Setting VIC to max clock should bring maximum throughput. If you have simultaneous NvBufferTransform() calls, please also create NvBufferSession to have better scheduling.

This topic was automatically closed 60 days after the last reply. New replies are no longer allowed.