Hello
I built a pipeline in C++ that is designed for the lowest possible latency. I capture directly into a dmabuf buffer pool via v4l2. The capture format is UYVY. I capture the left and right halves of a 3840x2160@60 stream via two separate video devices. When the left and right buffers are ready, I use CUDA to convert the format to NV12 and stitch the left and right halves of the image together, then write it directly to a dequeued buffer of the encoder. So zerocopy. This all works very well. Then it is encoded and I measure the latency after the dequeue on the capture side of the encoder. However, I find that the latency of the encoder for H265 encoding is still quite high. I get the following latencies on average:
Start of Frame to Capture right time Δ avg: 16.362506 ms
Start of Frame to Capture left time Δ avg: 16.366713 ms
Start of Frame to Stitch/Conversion time Δ avg: 19.269282 ms
Start of Frame to dequeue on Capture plane of Enc time Δ avg: 42.263388 ms
This means that NVenc takes approximately 23 ms for a 4K frame at 60 fps. I would like to reduce this value. I read my encoder parameters via a YAML file. I have attached my current parameters below.
I also wonder why my NVENC is not running at the maximum possible clock frequency. In my application, it clocks between 500 and 560 MHz even though I have max_perf enabled. However, the maximum clock frequency of the NVENC is 793 MHz. I have run jeston_clocks and my Jetson is running in MAXN_SUPER mode. My L4T version is 36.4.3 and my jetpack version is 6.2.1.
How can i reduce my Latency?
# Set encoding Format [Default = "H265"]
# ["H264", "H265", "VP8", "VP9", "AV1"]
encoder_pixfmt: "H265"
# Enable External RPS [Default = false]
externalRPS: false
# Enable RPS Three Layer SVC [Default = false]
RPS_threeLayerSvc: false
# Enable maximum Performance [Default = false]
max_perf: true
# Enable GDR [Default = false]
enableGDR: false
# Enable Gaps in FrameNum [Default = false]
bGapsInFrameNumAllowed: false
# H264 FrameNum bits [Default = 0]
nH264FrameNumBits: 0
# H265 poc lsb bits [Default = 0]
nH265PocLsbBits: 0
# No I-frames [Default = false]
bnoIframe: false
# Enable External picture RC [Default = false]
externalRCHints: false
# Max QP per session when external picture RC enabled (needs to be >0)
sMaxQp: 51
# Bitrate [Default = 4194304] (needs to be >0)
bitrate: 6250000
# Peak bitrate [Default = 1.2*bitrate] (needs to be >0)
peak_bitrate: 7500000
# I-frame Interval [Default = 30]
iframe_interval: 30
# IDR Interval [Default = 256]
idr_interval: 256
# Insert SPS PPS at every IDR [Default = false]
insert_sps_pps_at_idr: true
# Slice level encode output [Default = false]
enable_slice_level_encode: false
# CABAC Disable for H264 [Default = false]
disable_cabac: false
# Insert VUI [Default = false]
insert_vui: false
# Set Extended ColorFormat (Only works with insert-vui) [Default = false]
enable_extended_colorformat: false
# Insert AUD [Default = false]
insert_aud: false
# Enable all I-frame encoding [Default = false]
alliframes: false
# Encoding Level [Default set by the library] (Only for H264 and H265)
# H264: ["1.0", "1b", "1.1", "1.2", "1.3", "2.0", "2.1", "2.2", "3.0", "3.1", "3.2", "4.0", "4.1", "4.2", "5.0", "5.1"]
# H265: ["main1.0", "high1.0", "main2.0", "high2.0", "main2.1", "high2.1", "main3.0", "high3.0", "main3.1", "high3.1",
# "main4.0", "high4.0", "main4.1", "high4.1", "main5.0", "high5.0", "main5.1", "high5.1", "main5.2", "high5.2",
# "main6.0", "high6.0", "main6.1", "high6.1", "main6.2", "high6.2"]
level: "main6.2"
# Ratecontrol mode [Default = cbr]
# ["cbr", "vbr"]
ratecontrol: "cbr"
# Enable Lossless encoding [Default = false] (Option applicable only with YUV444 input)
enableLossless: false
# Disable Aysmmetric Motion Partition. Applicable only for H.265 Xavier [Default = false]
disable_amp: false
# Enable use of gold crc [Default = false]
use_gold_crc: false
# GOLD CRC [String]
gold_crc: ""
# Encoding Profile [Default = baseline]
# H264: ["baseline", "constrained-baseline", "main", "high", "constrained-high", "high444"]
# H265: ["main", "main10"]
profile: "main"
# Temporal Tradeoff level [Default = 0]
# [0, 1, 2, 3, 4]
# 0: Do not drop any buffers
# 1: Drop 1 in every 5 buffers
# 2: Drop 1 in every 3 buffers
# 3: Drop 1 in every 2 buffers
# 4: Drop 2 in every 3 buffers
temporal_tradeoff_level: 0
# Slice length type [Default = 1]
# [1, 2]
# 1: Number of MBs
# 2: Bytes
slice_length_type: 1
# HW preset type
# 0: Disable
# 1: ultrafast
# 2: fast
# 3: medium
# 4: slow
hw_preset_type: 1
# Slice length [Default = 0]
# 0: Default
# >0: Slice Length
slice_length: 0
# Virtual buffer size [Default = 0]
# 0: Default
# >0: vbv
virtual_buffer_size: 1560000
# Number of B frames [Default = 4294967295]
# -1: Not set
# >=0: num b frames
num_b_frames: -1
# Specify colorspace [Default = 1]
# 0: Default colorspace, i.e. let the driver figure it out
# 1: SMPTE 170M: used for broadcast NTSC/PAL SDTV
# 2: Obsolete pre-1998 SMPTE 240M HDTV standard, superseded by Rec 709
# 3: Rec.709: used for HDTV
color_space: 1
# Slice intrarefresh interval [Default = 0]
# 0: Default
# >0: Slice intrarefresh interval
slice_intrarefresh_interval: 0
# Specify minimum Qp Value for I frame [Default = 4294967295]
# -1: Not set
# >=0 && <=51: minimum/maximum Qp Value
nMinQpI: -1 # I-Frames
nMaxQpI: -1 # I-Frames
nMinQpP: -1 # P-Frames
nMaxQpP: -1 # P-Frames
nMinQpB: -1 # B-Frames
nMaxQpB: -1 # B-Frames
# Reconstructed surface CRC [Default = false]
bReconCrc: false
# Reconstructed surface Left cordinate [Default = 0] (only considered if bReconCrc == true)
rl: 0
# Reconstructed surface Top cordinate [Default = 0] (only considered if bReconCrc == true)
rt: 0
# Reconstructed surface width [Default = 5] (only considered if bReconCrc == true)
# Has to be >0
rw: 5
# Reconstructed surface height [Default = 5] (only considered if bReconCrc == true)
# Has to be >0
rh: 5
# Sample Aspect Ratio parameters [Default = 0 0]
# 0: Default SAR
# >0: SAR value
sar_width: 0
sar_height: 0
# Print encoder output metadata [Default = false]
report_metadata: false
# Enable encoder input metadata [Default = false]
input_metadata: false
# Dump encoded motion vectors [Default = false]
dump_mv: false
# Disable Constant QP, enable rate control [Default = true]
enable_ratecontrol: true
# Enable Init QP Values for I, P and B frames
enable_initQP: false
# Init QP Values [Default = 0] (Has to be >=0) (only considered if enable_ratecontrol == true)
IinitQP: 0
PinitQP: 0
BinitQP: 0
# Enable preprocessing enhancements (PPE) [Default = false]
enable_ppe: false
# Specify the PPE wait time for encoder, in milliseconds [Default = infinite wait] (only considered if enable_ppe == true)
# -1: Wait infinite
# >=0 && <=90: max number of milliseconds that the encoder should wait for each frame processing
wait_time_ms: -1
# Enable profiler for PPE [Default = false] (only considered if enable_ppe == true)
enable_profiler: false
# Enable PPE Temporal AQ feature [Default = false] (only considered if enable_ppe == true)
ppe_taq: true
# Enable PPE Spatial AQ feature [Default = false] (only considered if enable_ppe == true)
ppe_saq: false
# Specify the max QP delta strength for TAQ and SAQ [Default = 5] (only considered if enable_ppe == true)
# Range: >=1 && <=10
taq_max_qp_delta: 5
saq_max_qp_delta: 5
# Enable B-frame support for PPE TAQ [Default = true] (only considered if enable_ppe == true)
taq_b_frame_mode: true
# AV1 specific. Enable multi-tile encoding. [Default = false] (only considered if encoder_pixfmt == "AV1")
enable_av1tile: false
# AV1 specific. Specify Log2 rows and cols for Tile
# Values need to be >=0
log2_num_av1rows: 0
log2_num_av1cols: 0
# AV1 specific. Enable and set Ssim RDO [Default = -1, set by encoder]
# -1: Disabled, set by encoder
# >=0: Enable Ssim RDO with this value
enable_av1ssimrdo: -1
# Enable and set CDF Update [Default=-1, set by encoder]
# -1: Disabled, set by encoder
# >=0: Enable and set CDF Update with this value
disable_av1cdfupdate: -1
# AV1 specific. Error Resilient Mode [Default = true, set by encoder]
# ATTENTION: true: Disables Control, set by encoder
# false: Enables Control
av1erresmode: true
# AV1 specific. Enable frameID present flag. [Default = false]
enable_av1frameidflag: false
# Specify Picture Order Control Type type [Default = 0]
# value has to be >0
poc_type: 0
# Enable two pass CBR for H264/H265 during encode [Default = false]
enable_two_pass_cbr: false