I am currently attempting to compare software- and hardware-based conversion of h.264 → rgb. For that I implemented a FFmpeg-based video source operator which receives a srt stream and decodes the h.264.
Is it possible to profile FFmpeg conversion using nsight systems? The process for the ffmpeg command does not show any traces.
Is there a better way to do this? Is it possible to circumvent the host machine and directly receive the frames on the device?
Thanks in advance
class FFmpegSRTStreamSourceOp(Operator):
def __init__(self, fragment, url, height, width, n_channels, *args, **kwargs):
self.height = height
self.width = width
self.n_channels = n_channels
self.url = url
self.buffer_size = self.width*self.height*self.n_channels
self.ffmpeg_command = ['ffmpeg',
'-max_delay', '0',
'-y', '-vsync', '0',
'-hwaccel_device', '0',
'-hwaccel', 'cuda',
'-fflags', 'nobuffer', '-flags', 'low_delay', '-strict', 'experimental',
'-i', url,
'-pix_fmt', 'rgb24',
'-s', f'{width}x{height}',
'-vf', 'setpts=0',
'-f', 'rawvideo', 'pipe:'
super().__init__(fragment, *args, **kwargs)
def setup(self, spec: OperatorSpec):
def start(self):
# using subprocess and pipe to fetch frame data
self.p = subprocess.Popen(self.ffmpeg_command, stdout=subprocess.PIPE, bufsize=10**8)
@nvtx.annotate("compute", color="green")
def compute(self, op_input, op_output, context):
with nvtx.annotate("stdout.read", color="blue"):
raw_bytes = self.p.stdout.read(self.width*self.height*self.n_channels)
with nvtx.annotate("bytes_to_tensor", color="yellow"):
tensor = cp.frombuffer(raw_bytes, cp.uint8)
if tensor.size != self.buffer_size:
tensor = tensor.reshape(self.height, self.width, self.n_channels)
entity = Entity(context)
op_output.emit(entity, "source")
def stop(self):
return super().stop()