NVOF visualization not working as expected

Hi, I am using the NVOF API in Cuda to get the flow vectors. I have followed this documentation.
I am using abgr format input and reference frames and I am receiving the vectors in S10.5 format which I am converting to float and doing some post processing to covert it into RGB displayable format. For some reason I am not able to get the correct visualization and getting some 4 divisions in the frame displayed. I have used some SDK code for post processing but it does not display as I expect it. I have run the same frames through AppOFCuda sample from the SDK and compared the visualization frame results. I believe I am getting incorrect vectors.
Does this function look correct way of calculating flow vectors?


void calculateFlow(uint8_t* frame1, uint8_t* frame2, uint8_t* vecframe, CUcontext cuContext, CUstream instream, CUstream outstream) {
    // Create an instance of the API
    API* nvofobj = new API(cuContext, instream, outstream);
    
    // Initialize the input parameters
    NV_OF_INIT_PARAMS initparams = { 0 };

    initparams.width = W_BUFF;
    initparams.height = H_BUFF;
    initparams.inputBufferFormat = NV_OF_BUFFER_FORMAT_ABGR8;
    initparams.mode = NV_OF_MODE_OPTICALFLOW;
    initparams.outGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)gridsize;
    initparams.enableOutputCost = NV_OF_FALSE;
    initparams.predDirection = NV_OF_PRED_DIRECTION_FORWARD;
    initparams.perfLevel = NV_OF_PERF_LEVEL_SLOW;
    initparams.enableExternalHints = NV_OF_FALSE;
    initparams.enableRoi = NV_OF_FALSE;
    initparams.hintGridSize = (NV_OF_HINT_VECTOR_GRID_SIZE)0;

    NVOF_API_CALL(nvofobj->getAPI()->nvOFInit(nvofobj->getHandle(), &initparams));

    // Buffer management
    // Input frame buffer and uploading to GPU
    NV_OF_BUFFER_DESCRIPTOR inbufferDesc;
    inbufferDesc.width = W_BUFF;
    inbufferDesc.height = H_BUFF;
    inbufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT;
    inbufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_ABGR8;

    NvOFCudaBuffer* inbuffer = new NvOFCudaBuffer(nvofobj, inbufferDesc, 1);
    inbuffer->UploadData((void*)frame1);

    // Reference frame buffer and uploading to GPU
    NV_OF_BUFFER_DESCRIPTOR refbufferDesc;
    refbufferDesc.width = W_BUFF;
    refbufferDesc.height = H_BUFF;
    refbufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT;
    refbufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_ABGR8;

    NvOFCudaBuffer* refbuffer = new NvOFCudaBuffer(nvofobj, refbufferDesc, 1);
    refbuffer->UploadData((void*)frame2);
    
    // Pointer for storing the flow vectors
    std::unique_ptr<NV_OF_FLOW_VECTOR[]> flowdata;
    
    // calculate the dimensions of the output buffer
    uint16_t outwidth = 0, outheight = 0;
    
    outheight = H_BUFF / gridsize;
    outwidth = W_BUFF / gridsize;
    
    flowdata.reset(new NV_OF_FLOW_VECTOR[outwidth * outheight]);

    // Output buffer
    NV_OF_BUFFER_DESCRIPTOR outbufferDesc;
    outbufferDesc.width = outwidth;
    outbufferDesc.height = outheight;
    outbufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT;
    outbufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_ABGR8;

    NvOFCudaBuffer* outbuffer = new NvOFCudaBuffer(nvofobj, outbufferDesc, 1);

    NV_OF_EXECUTE_INPUT_PARAMS inparams;
    memset(&inparams, 0, sizeof(NV_OF_EXECUTE_INPUT_PARAMS));
    inparams.inputFrame = inbuffer->getOFBufferHandle();
    inparams.referenceFrame = refbuffer->getOFBufferHandle();
    inparams.externalHints = nullptr;
    inparams.disableTemporalHints = NV_OF_FALSE;
    inparams.hPrivData = nullptr;
    inparams.numRois = 0;
    inparams.padding = 0;
    inparams.padding2 = 0;

    NV_OF_EXECUTE_OUTPUT_PARAMS outparams;
    memset(&outparams, 0, sizeof(NV_OF_EXECUTE_OUTPUT_PARAMS));
    outparams.bwdOutputBuffer = nullptr;
    outparams.bwdOutputCostBuffer = nullptr;
    outparams.globalFlowBuffer = nullptr;
    outparams.hPrivData = nullptr;
    outparams.outputBuffer = outbuffer->getOFBufferHandle();
    outparams.outputCostBuffer = nullptr;

    // Run Optical Flow
    nvofobj->getAPI()->nvOFExecute(nvofobj->getHandle(), &inparams, &outparams);

    // Will contain the flow vectors in direction x and y for each pixel
    outbuffer->DownloadData((void*)flowdata.get());

    postProcessVectors((const NV_OF_FLOW_VECTOR*)flowdata.get(), (uint8_t*)vecframe, outwidth, outheight);

    flowdata.reset();
    
    // Destroy the buffers
    delete inbuffer;
    delete refbuffer;
    delete outbuffer;

    // Destroy NVOF session
    nvofobj->getAPI()->nvOFDestroy(nvofobj->getHandle());
}

My resulting frame