NVENC encoded video is in slow motion

Environment

  • GPU: NVIDIA 5070 ti
  • Driver Version: 572.83
  • Operating System: Windows 11
  • NVENC API: Using nvEncodeAPI64.h from video sdk 13.0
  • Encoding Format: H.264 High Profile
  • Target Frame Rate: 60 FPS

I’m trying to make a screen recording application with nvenc. The application successfully captures and encodes frames, but the resulting video plays in slow motion despite the frame rate being correctly set to 60 FPS throughout the pipeline.

When I examine the output video with ffprobe and it shows this:

> ffprobe  .\test_capture.h264        
Input #0, h264, from 'test_capture.h264':
  Duration: N/A, bitrate: N/A
  Stream #0:0: Video: h264 (High), yuv420p(progressive), 3840x2160 [SAR 1:1 DAR 16:9], 25 fps, 60 tbr, 1200k tbn

However, when playing the video, it appears to be in slow motion, as if the timestamps of the frames are not being properly respected during playback or the framerate is wrong. (I’m not sure)

NVENC Initialization

void NvEncContext::Initiate(Microsoft::WRL::ComPtr<ID3D11Device> d3ddev, unsigned w, unsigned h, int targetFps, int frames) {
    width = w; height = h;
    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS ses = { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER };
    ses.device = d3ddev.Get();
    ses.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
    NVCHK(g_nvenc.nvEncOpenEncodeSessionEx(&ses, &session));

    NV_ENC_PRESET_CONFIG preset = { NV_ENC_PRESET_CONFIG_VER };
    preset.presetCfg.version = NV_ENC_CONFIG_VER;
    NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
    NVCHK(g_nvenc.nvEncGetEncodePresetConfigEx(session, NV_ENC_CODEC_H264_GUID, NV_ENC_PRESET_P4_GUID, tuningInfo, &preset));
    NV_ENC_CONFIG encconf = preset.presetCfg;

    encconf.gopLength = targetFps;
    encconf.frameIntervalP = 1;
    
    encconf.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
    encconf.rcParams.averageBitRate = 8000000; // 8 Mbps
    encconf.rcParams.maxBitRate = 12000000; // 12 Mbps
    encconf.rcParams.vbvBufferSize = 16000000;
    encconf.rcParams.vbvInitialDelay = 8000000;

    encconf.version = NV_ENC_CONFIG_VER;
    encconf.rcParams.version = NV_ENC_RC_PARAMS_VER;

    NV_ENC_INITIALIZE_PARAMS init = {0};
    init.version = NV_ENC_INITIALIZE_PARAMS_VER;
    init.encodeConfig = &encconf;
    init.encodeGUID = NV_ENC_CODEC_H264_GUID;
    init.presetGUID = NV_ENC_PRESET_P4_GUID;
    init.encodeWidth = width; init.encodeHeight = height;
    init.darWidth = width; init.darHeight = height;
    init.frameRateNum = targetFps; init.frameRateDen = 1; // it's set here right?
    init.enablePTD = 1;
    init.bufferFormat = fmt;
    init.tuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
    NVCHK(g_nvenc.nvEncInitializeEncoder(session, &init));

    for (unsigned i = 0; i < frames; ++i) {
        NV_ENC_CREATE_BITSTREAM_BUFFER bb = { NV_ENC_CREATE_BITSTREAM_BUFFER_VER };
        NVCHK(g_nvenc.nvEncCreateBitstreamBuffer(session, &bb));
        bitstreams.push_back(bb.bitstreamBuffer);
    }
}

Frame Encoding

void VideoEncoder::EncodeFrame(NvEncContext& nvenc, ComPtr<ID3D11Texture2D> tex, std::ofstream& out, int frameIdx, 
                              const std::chrono::high_resolution_clock::time_point& timestamp) {
    // Log frame encoding with timestamp information
    auto now = std::chrono::high_resolution_clock::now();
    auto frameTime = std::chrono::duration_cast<std::chrono::milliseconds>(timestamp.time_since_epoch()).count();
    auto currentTime = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
    
    std::cout << "Encoding frame " << frameIdx << " with timestamp: " << frameTime 
              << " ms (delta from now: " << (currentTime - frameTime) << " ms)" << std::endl;
    
    NV_ENC_REGISTERED_PTR regptr = RegisterSurfaceOnce(nvenc.session, tex.Get(), nvenc.fmt);

    NV_ENC_MAP_INPUT_RESOURCE map = {0};
    map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
    map.registeredResource = regptr;
    NVCHK(g_nvenc.nvEncMapInputResource(nvenc.session, &map));
    
    NV_ENC_PIC_PARAMS pic = { NV_ENC_PIC_PARAMS_VER };
    pic.inputBuffer = map.mappedResource;
    pic.outputBitstream = nvenc.bitstreams[frameIdx % nvenc.bitstreams.size()];
    pic.bufferFmt = nvenc.fmt;
    pic.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
    pic.inputWidth = nvenc.width; pic.inputHeight = nvenc.height;
    pic.completionEvent = nullptr;
    pic.encodePicFlags = 0;
    
    NVENCSTATUS encStatus = g_nvenc.nvEncEncodePicture(nvenc.session, &pic);
    if (encStatus != NV_ENC_SUCCESS) {
        std::cerr << "nvEncEncodePicture failed with code " << encStatus 
                << " (" << nvEncStatusStr(encStatus) << ")\n";
        throw std::runtime_error("nvEncEncodePicture");
    }
    
    NV_ENC_LOCK_BITSTREAM lock = { NV_ENC_LOCK_BITSTREAM_VER };
    lock.outputBitstream = pic.outputBitstream;
    lock.doNotWait = false;
    NVCHK(g_nvenc.nvEncLockBitstream(nvenc.session, &lock));
    if (lock.bitstreamSizeInBytes > 0)
        out.write((const char*)lock.bitstreamBufferPtr, lock.bitstreamSizeInBytes);
    NVCHK(g_nvenc.nvEncUnlockBitstream(nvenc.session, pic.outputBitstream));
    NVCHK(g_nvenc.nvEncUnmapInputResource(nvenc.session, map.mappedResource));
}
bool VideoEncoder::encodeFrames(const std::vector<Frame>& frames, const std::string& outputPath) {
    if (!initialized) {
        std::cerr << "Encoder not initialized" << std::endl;
        return false;
    }
    
    try {
        std::ofstream out(outputPath, std::ios::binary);
        if (!out) {
            std::cerr << "Failed to open output file: " << outputPath << std::endl;
            return false;
        }
        
        for (size_t i = 0; i < frames.size(); ++i) {
            EncodeFrame(nvenc, frames[i].getTexture(), out, i, frames[i].getTimestamp());
        }
        
        NV_ENC_PIC_PARAMS eof = {NV_ENC_PIC_PARAMS_VER}; 
        eof.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
        NVCHK(g_nvenc.nvEncEncodePicture(nvenc.session, &eof));
        
        for (unsigned i = 0; i < nvenc.bitstreams.size(); ++i) {
            NV_ENC_LOCK_BITSTREAM lock = { NV_ENC_LOCK_BITSTREAM_VER };
            lock.outputBitstream = nvenc.bitstreams[i];
            lock.doNotWait = false;
            auto e = g_nvenc.nvEncLockBitstream(nvenc.session, &lock);
            if (e == NV_ENC_SUCCESS && lock.bitstreamSizeInBytes)
                out.write((const char*)lock.bitstreamBufferPtr, lock.bitstreamSizeInBytes);
            if (e == NV_ENC_SUCCESS)
                g_nvenc.nvEncUnlockBitstream(nvenc.session, lock.outputBitstream);
        }
        
        out.close();
        return true;
    }
    catch (std::exception& e) {
        std::cerr << "Encoding failed: " << e.what() << std::endl;
        return false;
    }
}

Frame Capture Loop

void ScreenRecorder::captureLoop() {
    // frame time in microseconds
    const long long frameTimeUs = 1000000 / fps;
    auto lastFrameTime = std::chrono::high_resolution_clock::now();
    
    while (running) {
        auto targetTime = lastFrameTime + std::chrono::microseconds(frameTimeUs);
        auto now = std::chrono::high_resolution_clock::now();
        
        if (now < targetTime) {
            auto diff = targetTime - now;
            auto waitTimeUs = std::chrono::duration_cast<std::chrono::microseconds>(diff).count();
            
            if (waitTimeUs > 2000) {
                std::this_thread::sleep_for(std::chrono::microseconds(waitTimeUs));
            }
            
            while (std::chrono::high_resolution_clock::now() < targetTime) {
                std::this_thread::yield();
            }
        }
        
        Frame* frame = capturer.captureFrame();
        if (frame) {
            frameBuffer.addFrame(*frame);
            delete frame;
        }
        
        auto currentTime = std::chrono::high_resolution_clock::now();
        
        auto diff = currentTime - targetTime;
        auto behindUs = std::chrono::duration_cast<std::chrono::microseconds>(diff).count();
        
        if (behindUs > 0) {
            if (behindUs > frameTimeUs) {
                lastFrameTime = currentTime;
            } else {
                long long catchUpUs = (behindUs < frameTimeUs / 4) ? behindUs : frameTimeUs / 4;
                lastFrameTime = currentTime - std::chrono::microseconds(catchUpUs);
            }
        } else {
            lastFrameTime = currentTime;
        }
    }
}

I think maybe the raw h.264 stream might not have a framerate so I used the mp4 container and set -r 60 in hopes of correcting the speed of the video to no avail.

ffmpeg -i "test_capture.h264" -c:v copy -r 60 -f mp4 capture.mp4

Troubleshooting Steps Taken

  1. Confirmed that the frame rate is set to 60 FPS in the NVENC initialization parameters
  2. Tried different media players (VLC, MPV, etc.) - all show slow motion playback

Questions

  1. Is there a specific NVENC parameter or flag I’m missing that would ensure proper playback speed?
  2. Am I passing incorrect information to the encoder?
  3. Could the issue be related to the way I’m writing the encoded bitstream to the output file?

Any insights or suggestions would be greatly appreciated. Thank you!