Using nvdisasm to show line numbers and register usage

Hello, I have been using nvdisasm using the following options - nvdisasm -g -plr -lrm=narrow. I have the following questions -

  1. I can only make -g or -plr -lrm=narrow work at a given time, i.e., if supplied together, -g is ignored. What am I doing wrong here?
  2. How can I format the output of -plr? Using -plr -lrm=narrow, I get a massive amount of gap between the instructions and the register usage column, e.g. -

If no superior alternative is offered, you could always remove white space from lines by piping the output of nvdisasm through the cut utility program. If you are on Windows, it is available as part of Cygwin.

1 Like

hmmm

I took the simpleMultiCopy.cu sample code, compiled it as:

nvcc simpleMulticopy.cu -cubin -lineinfo -o s.o

and then did:

nvdisasm -g -plr -lrm=narrow s.o 

and got this:

<snip>
//--------------------- .text._Z9incKernelPiS_ii  --------------------------
        .section        .text._Z9incKernelPiS_ii,"ax",@progbits
        .sectioninfo    @"SHI_REGISTERS=12"
        .align  32
                                                                                     // +------------------+--------+
                                                                                     // |       GPR        | PRED   |
                                                                                     // |                  |        |
                                                                                     // |    000000000011  |        |
                                                                                     // |  # 012345678901  |  # 01  |
                                                                                     // +------------------+--------+
        .global         _Z9incKernelPiS_ii                                           // |                  |        |
        .type           _Z9incKernelPiS_ii,@function                                 // |                  |        |
        .size           _Z9incKernelPiS_ii,(.L_x_7 - _Z9incKernelPiS_ii)             // |                  |        |
        .other          _Z9incKernelPiS_ii,@"STO_CUDA_ENTRY STV_DEFAULT"             // |                  |        |
_Z9incKernelPiS_ii:                                                                  // |                  |        |
.text._Z9incKernelPiS_ii:                                                            // |                  |        |
        /*0008*/                   MOV R1, c[0x0][0x20] ;                            // |  1  ^            |        |
        /*0010*/         {         MOV R6, c[0x0][0x154] ;                           // |  3 ^:    ^       |        |
        /*0018*/                   S2R R0, SR_CTAID.X         }                      // |  3 ^:    ^       |        |
        /*0028*/         {         ISETP.GE.AND P0, PT, R6, 0x1, PT ;                // |  4 ::^   v       |  1 ^   |
        /*0030*/                   S2R R2, SR_TID.X         }                        // |  4 ::^   v       |  1 ^   |
        /*0038*/                   XMAD.MRG R3, R0.reuse, c[0x0] [0x8].H1, RZ ;      // |  5 v::^  :       |  1 :   |
        /*0048*/                   XMAD R2, R0.reuse, c[0x0] [0x8], R2 ;             // |  5 v:x:  :       |  1 :   |
        /*0050*/                   XMAD.PSL.CBCC R0, R0.H1, R3.H1, R2 ;              // |  5 x:vv  :       |  1 :   |
        /*0058*/                   ISETP.GE.OR P0, PT, R0, c[0x0][0x150], !P0 ;      // |  3 v:    :       |  1 x   |
        /*0068*/                   NOP ;                                             // |  3 ::    :       |  1 :   |
        /*0070*/                   NOP ;                                             // |  3 ::    :       |  1 :   |
        /*0078*/               @P0 EXIT ;                                            // |  3 ::    :       |  1 v   |
        /*0088*/                   SHL R4, R0.reuse, 0x2 ;                           // |  4 v:  ^ :       |        |
        /*0090*/                   IADD32I R5, R6, -0x1 ;                            // |  5 ::  :^v       |        |
        /*0098*/                   SHR R0, R0, 0x1e ;                                // |  5 x:  :::       |        |
        /*00a8*/                   IADD R2.CC, R4, c[0x0][0x148] ;                   // |  6 ::^ v::       |        |
        /*00b0*/                   ISETP.GE.U32.AND P0, PT, R5, 0x3, PT ;            // |  6 ::: :v:       |  1 ^   |
        /*00b8*/                   IADD.X R3, R0, c[0x0][0x14c] ;                    // |  6 v::^: :       |  1 :   |
        /*00c8*/                   IADD R4.CC, R4, c[0x0][0x140] ;                   // |  6 ::::x :       |  1 :   |
        /*00d0*/                   IADD.X R5, R0, c[0x0][0x144] ;                    // |  7 v::::^:       |  1 :   |
        /*00d8*/         {         LOP32I.AND R0, R6, 0x3 ;                          // |  7 ^:::::v       |  1 v   |
        /*00e8*/              @!P0 BRA `(.L_x_0)         }                           // |  7 ^:::::v       |  1 v   |
        /*00f0*/                   IADD R6, -R0, c[0x0][0x154] ;                     // |  7 v:::::^       |        |
        /*00f8*/                   ISETP.GT.AND P0, PT, R6, RZ, PT ;                 // |  7 ::::::v       |  1 ^   |
        /*0108*/              @!P0 BRA `(.L_x_1) ;                                   // |  7 :::::::       |  1 v   |
        /*0110*/                   ISETP.GT.AND P1, PT, R6, 0xc, PT ;                // |  7 ::::::v       |  1  ^  |
        /*0118*/         {         PSETP.AND.AND P0, PT, PT, PT, PT ;                // |  7 :::::::       |  2 ^v  |
        /*0128*/              @!P1 BRA `(.L_x_2)         }                           // |  7 :::::::       |  2 ^v  |
        /*0130*/                   PSETP.AND.AND P0, PT, !PT, PT, PT ;               // |  7 :::::::       |  1 ^   |
.L_x_3:                                                                              // |  8 ::::::::      |  1 :   |
        /*0138*/         {         IADD32I R6, R6, -0x10 ;                           // |  8 ::vv::x^      |  1 :   |
        /*0148*/                   LDG.E R7, [R2]         }                          // |  8 ::vv::x^      |  1 :   |
        /*0150*/                   IADD32I R7, R7, 0x1 ;                             // |  8 :::::::x      |  1 :   |
        /*0158*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*0168*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0170*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*0178*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*0188*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0190*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  1 :   |
        /*0198*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  1 :   |
        /*01a8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*01b0*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  1 :   |
        /*01b8*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  1 :   |
        /*01c8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*01d0*/                   IADD32I R7, R8, 0x1 ;                             // |  9 :::::::^v     |  1 :   |
        /*01d8*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*01e8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*01f0*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*01f8*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*0208*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0210*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  1 :   |
        /*0218*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  1 :   |
        /*0228*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0230*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  1 :   |
        /*0238*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  1 :   |
        /*0248*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0250*/                   IADD32I R7, R8, 0x1 ;                             // |  9 :::::::^v     |  1 :   |
        /*0258*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*0268*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0270*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*0278*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*0288*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0290*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  1 :   |
        /*0298*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  1 :   |
        /*02a8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*02b0*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  1 :   |
        /*02b8*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  1 :   |
        /*02c8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*02d0*/                   IADD32I R7, R8, 0x1 ;                             // |  9 :::::::^v     |  1 :   |
        /*02d8*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*02e8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*02f0*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*02f8*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*0308*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0310*/                   ISETP.GT.AND P1, PT, R6, 0xc, PT ;                // |  8 ::::::v :     |  2 :^  |
        /*0318*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  2 ::  |
        /*0328*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  2 ::  |
        /*0330*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  2 ::  |
        /*0338*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  2 ::  |
        /*0348*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  2 ::  |
        /*0350*/               @P1 BRA `(.L_x_3) ;                                   // |  7 :::::::       |  2 :v  |
.L_x_2:                                                                              // |  7 :::::::       |  2 ::  |
        /*0358*/                   ISETP.GT.AND P1, PT, R6, 0x4, PT ;                // |  7 ::::::v       |  2 :^  |
        /*0368*/              @!P1 BRA `(.L_x_4) ;                                   // |  7 :::::::       |  2 :v  |
        /*0370*/         {         PSETP.AND.AND P0, PT, !PT, PT, PT ;               // |  8 ::vv:::^      |  1 ^   |
        /*0378*/                   LDG.E R7, [R2]         }                          // |  8 ::vv:::^      |  1 ^   |
        /*0388*/                   IADD32I R7, R7, 0x1 ;                             // |  8 :::::::x      |  1 :   |
        /*0390*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*0398*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*03a8*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*03b0*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*03b8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*03c8*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  1 :   |
        /*03d0*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  1 :   |
        /*03d8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*03e8*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  1 :   |
        /*03f0*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  1 :   |
        /*03f8*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0408*/                   IADD32I R7, R8, 0x1 ;                             // |  9 :::::::^v     |  1 :   |
        /*0410*/                   STG.E [R4], R7 ;                                  // |  8 ::::vv:v      |  1 :   |
        /*0418*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0428*/                   IADD32I R9, R8, 0x1 ;                             // |  9 ::::::: v^    |  1 :   |
        /*0430*/                   STG.E [R4], R9 ;                                  // |  8 ::::vv:  v    |  1 :   |
        /*0438*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0448*/                   IADD32I R10, R8, 0x1 ;                            // |  9 ::::::: v ^   |  1 :   |
        /*0450*/                   STG.E [R4], R10 ;                                 // |  8 ::::vv:   v   |  1 :   |
        /*0458*/                   LDG.E R8, [R2] ;                                  // |  8 ::vv::: ^     |  1 :   |
        /*0468*/                   IADD32I R6, R6, -0x8 ;                            // |  8 ::::::x :     |  1 :   |
        /*0470*/                   IADD32I R11, R8, 0x1 ;                            // |  9 ::::::: v  ^  |  1 :   |
        /*0478*/                   STG.E [R4], R11 ;                                 // |  8 ::::vv:    v  |  1 :   |
.L_x_4:                                                                              // |  7 :::::::       |  1 :   |
        /*0488*/                   ISETP.NE.OR P0, PT, R6, RZ, P0 ;                  // |  7 ::::::v       |  1 x   |
        /*0490*/              @!P0 BRA `(.L_x_0) ;                                   // |  7 :::::::       |  1 v   |
.L_x_1:                                                                              // |  8 ::::::::      |        |
<snip>

I didn’t seem to have trouble getting either the lineinfo or any “massive amount of gap” between instructions and register usage columns.

CUDA 12.2, linux

I too am on 12.2, on Linux. I don’t see the line numbers in your snippet. For reference, when I use only the -g flag with nvdisasm, I get the line numbers associated with the source code -

        /*44b0*/                   DMUL R2, R2, c[0x2][0x1138] ;
        //## File "CKUBMS_wrapper.cu", line 710
        /*44c0*/                   LEA R4, P0, R0, c[0x0][0x170], 0x3 ;
        /*44d0*/                   LEA.HI.X R5, R0, c[0x0][0x174], RZ, 0x3, P0 ;
        //## File "CKUBMS_wrapper.cu", line 703
        /*44e0*/                   DMUL R2, R2, R12 ;
        /*44f0*/                   STG.E.64 [R4.64], R2 ;
        //## File "CKUBMS_wrapper.cu", line 711
        /*4500*/                   EXIT ;

I agree the line info is missing. If you want to see both, I suggest filing a bug.