CUDA-GDB Printing Wrong Info

I found some confusing information from cuda-gdb.

I have a data structure defined as

typedef struct
{
    // die relative x location of this RTCM frame
    int64_t dieRelativeTopLeftFrameX;
    // die relative y location of this RTCM frame
    int64_t dieRelativeTopLeftFrameY;
    // Query expand X and Y used to check validness of expandToLeft/Top/Right/Bottom in CareAreaArrayInfo
    uint32_t queryExpandX;
    uint32_t queryExpandY;
    // offset from origin of RTCM coordinate system to top/left corner of frame (pixels x16)
    uint32_t jobOffsetX;
    uint32_t jobOffsetY;
    // number of channels (0..RTCM_MAX_CHANNELS) - must be 1 for Bright Field
    uint8_t numChannel;
    // number of elements per channel in the following LUTs
    uint16_t numCaGrpPerChannel[RTCM_MAX_CHANNELS];
    // mapping from group ID to sens ID or vice versa
    RtcmCareAreaGroup** ppCareAreaGroup = nullptr;
    // number of elements per channel in the following LUTs
    uint16_t fieldGroupCount[RTCM_MAX_CHANNELS];
    FieldGroup** fieldGroup;
    // number of RTCM care area bounding boxes
    uint32_t rtcmBoundingBoxCount;
    // RTCM care area bounding boxes
    RTCMBoundingBox* pBoundingBoxes;
    uint32_t frameWidth; // Width of frame
    uint32_t pixelWidth; // Pixel width
    // == 0: pre-flattened (legacy) RTCM - use sensitivity order (priority) for rendering
    // != 0: unflattened (new) RTCM - use Z order (renderPriority) for rendering
    uint8_t unflattened;
    // == 0: original sort order (legacy) - need to upgrade unflattened data to new sort order prior to Z order
    // rendering
    // == 1: Z sort order (new) - one CAG ID per bounding box, ready for Z order rendering of unflattened data
    //  > 1: reserved for future sort order options
    uint8_t sortOrder;
} RTCMFrame;

This data structure was read in a kernel:

__global__ void readRtcmFrame_knl_v2(RTCMFrame* pRtcmFrame,
    uint32_t* pDieRelFrameIDx,
    uint32_t* pDieRelFrameIDy, 
    uint32_t* pBbIndices,
    const uint8_t* pBuffer,
    uint32_t* pBufferIndex,
    const int frameIndex,
    RtcmCareAreaGroup** ppGrpInfo,
    RtcmCareAreaGroup* pGrpInfo,
    RTCMBoundingBox* pBoundingBox,
    const uint8_t formatVersion)
{
    uint32_t frameHeight{0}, pixelHeight{0};
    uint8_t charVal{0}, direction{0}, cagPresent{0}, rfaPresent{0}, rfaAssociationPresent{0};
    uint16_t associatedRFAID{0};
    uint32_t associatedRFASubID{0}, offsetFrameToDiffFrameX{0}, offsetFrameToDiffFrameY{0};

    uint32_t bufferIndex = pBufferIndex[frameIndex]; // read buffer index into register

#if DEBUG
    if (frameIndex == 0)
    {
        printf("Starting buffer index in frame %d: %d\n", frameIndex, bufferIndex);
        printf("Version of RTCM format: %d\n", formatVersion);
    }
#endif

    // die relative x of the top left corner of the frame
    READ_LONG(pBuffer, bufferIndex, (uint64_t*)&pRtcmFrame->dieRelativeTopLeftFrameX);

    // die relative y of the top left corner of the frame
    READ_LONG(pBuffer, bufferIndex, (uint64_t*)&pRtcmFrame->dieRelativeTopLeftFrameY);

    if (formatVersion <= RTCM_FORMAT_20)
    {
        // Query expand X/Y
        READ_INT(pBuffer, bufferIndex, &pRtcmFrame->queryExpandX);
        READ_INT(pBuffer, bufferIndex, &pRtcmFrame->queryExpandY);
    }

#if DEBUG
    printf("query expand x and y in frame %d: %d, %d\n", 
        frameIndex, 
        pRtcmFrame->queryExpandX, 
        pRtcmFrame->queryExpandY);
#endif

    // info (direction of swath, CAG present, RFA present, + 5 reserved bits)
    READ_CHAR(pBuffer, bufferIndex, &charVal);

    direction = RTCM_FRAME_DIRECTION(charVal); // bit 0
    cagPresent = RTCM_FRAME_CARE_AREA_GROUP_PRESENT(charVal); // bit 1
    rfaPresent = RTCM_FRAME_FIELD_GROUP_PRESENT(charVal); // bit 2
    rfaAssociationPresent = RTCM_FRAME_FIELD_ASSOCIATION_PRESENT(charVal); // bit 3
    pRtcmFrame->unflattened = RTCM_FRAME_UNFLATTENED(charVal); // bit 4
    pRtcmFrame->sortOrder = RTCM_FRAME_SORT_ORDER(charVal); // bits 5 and 6

    READ_SHORT_INT(pBuffer, bufferIndex, &frameHeight); // read RTCM frame height
    READ_SHORT_INT(pBuffer, bufferIndex, &pRtcmFrame->frameWidth); // read RTCM frame width

    // Job frame Offset X/Y
    if (formatVersion > RTCM_FORMAT_20) // only applies when format < 20
    {
        // Since RTCMv20, jobOffsetX will be removed, as this value is not used in Algo
        READ_SHORT_INT(pBuffer, bufferIndex, &pRtcmFrame->jobOffsetX);
    }

    READ_SHORT_INT(pBuffer, bufferIndex, &pRtcmFrame->jobOffsetY);

    // pixel height (skipped)/width
    READ_INT(pBuffer, bufferIndex, &pixelHeight);
    READ_INT(pBuffer, bufferIndex, &pRtcmFrame->pixelWidth);

    // die relative frame id X/Y
    READ_SHORT_INT(pBuffer, bufferIndex, &(pDieRelFrameIDx[frameIndex]));
    READ_SHORT_INT(pBuffer, bufferIndex, &(pDieRelFrameIDy[frameIndex]));

    // Read if RFA Association present
    // Just read the params for now. Current RTCM data do not have these elements.
    if (rfaAssociationPresent > 0)
    {
        READ_SHORT(pBuffer, bufferIndex, &associatedRFAID);
        READ_SHORT_INT(pBuffer, bufferIndex, &associatedRFASubID);
        READ_INT(pBuffer, bufferIndex, &offsetFrameToDiffFrameX);
        READ_INT(pBuffer, bufferIndex, &offsetFrameToDiffFrameY);
    }

    // reserved (9 bytes)
    SkipBytes(bufferIndex, RTCM_RESERVED_LEN_1);

    // RTCM v15+ dropped support for channels, and the channel count should be equal to 1
    READ_CHAR(pBuffer, bufferIndex, &pRtcmFrame->numChannel);

    if (pRtcmFrame->numChannel != 1)
    {
        printf("numChannel is not equal to 1, it is equal to %d\n", pRtcmFrame->numChannel);
        return;
    }

    // read a table to convert from group id to sens id
    if (cagPresent > 0)
    {
        setRtcmCaGrpPtr(pRtcmFrame, pBuffer, bufferIndex, ppGrpInfo, pGrpInfo);
        pBufferIndex[frameIndex] = bufferIndex;
    }

    if (rfaPresent > 0) readFieldGroup(pRtcmFrame, pBuffer, bufferIndex);

    SkipBytes(bufferIndex, pRtcmFrame->numCaGrpPerChannel[0] * MAX_NUM_CA_GROUP_PER_FRAME);
    
    // read number of bounding boxes
    READ_SHORT_INT(pBuffer, bufferIndex, &pRtcmFrame->rtcmBoundingBoxCount);

    // allocate rtcmFrame->rtcmCA to accommodate sufficient pointers to bounding boxes
    pRtcmFrame->pBoundingBoxes = pBoundingBox;

    if (pRtcmFrame->pBoundingBoxes == NULL)
    {
        printf("pRtcmFrame->pBoundingBoxes is NULL\n");
        return;
    }

    // skip over sizes (in bytes) of each bounding box
    uint32_t offset = bufferIndex + pRtcmFrame->rtcmBoundingBoxCount * sizeof(uint32_t);

    for (uint16_t i = 0; i < pRtcmFrame->rtcmBoundingBoxCount; i++)
    {
        uint32_t intVal{0};
        READ_INT(pBuffer, bufferIndex, &intVal); // read number of bytes for current BB
        pBbIndices[i] = offset;
        offset += intVal;
    }
}

My breakpoint was set at READ_SHORT_INT(pBuffer, bufferIndex, &pRtcmFrame->jobOffsetY);. After executing this statement, I typed p pRtcmFrame->jobOffsetY, and got the following:
$30 = 0.

On the other hand, I typed p *pRtcmFrame, and got the following info:

$29 = {dieRelativeTopLeftFrameX = 74880000, dieRelativeTopLeftFrameY = 225469779, queryExpandX = 0, queryExpandY = 0, jobOffsetX = 0, jobOffsetY = 693, numChannel = 0 '\000', numCaGrpPerChannel = {0 <repeats 15 times>}, ppCareAreaGroup = 0x0, 
  fieldGroupCount = {0 <repeats 15 times>}, fieldGroup = 0x0, rtcmBoundingBoxCount = 0, pBoundingBoxes = 0x0, frameWidth = 8192, pixelWidth = 0, unflattened = 1 '\001', sortOrder = 1 '\001'}

Clearly, p pRtcmFrame->jobOffsetY showed the wrong information (as I verified using CPU code. The value should be 693). Is this a known bug with the following cuda-gdb version:

NVIDIA (R) CUDA Debugger
11.7 release
Portions Copyright (C) 2007-2022 NVIDIA Corporation
GNU gdb (GDB) 10.2
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.

Hi, thanks for reaching out!

This issue is known and was fixed in the release of CUDA Toolkit 12.1. It was previously reported in the following forum post:

1 Like

Could you provide a way to install cuda-gdb 12.1 or 12.2 only without the whole CUDA toolkit? Want to install that for a quick test.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.