Cuobjdump register usage information

Hello! I’m wondering whether the register usage info in the ELF file generated by cuobjdump is inclusive or exclusive? In other words, if func1 calls func2, does the register usage information of func1 includes the registers used by func2? Thank you!

For a fully linked object, I believe it should be inclusive. Example:

$ cat a.cu
__device__ float f(float a) {return normcdf(a);}
$ cat b.cu
__device__ float f(float a);

__global__ void k(float *i, float *o){

  o[threadIdx.x] = f(i[threadIdx.x]);
}


int main(){

  float *i = NULL;
  float *o = NULL;
  k<<<1,32>>>(i, o);
  cudaDeviceSynchronize();
}
$ nvcc -rdc=true -Xptxas=-v a.cu b.cu -o test
ptxas info    : 0 bytes gmem
ptxas info    : Function properties for _Z1ff
    0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info    : 0 bytes gmem
ptxas info    : Compiling entry function '_Z1kPfS_' for 'sm_52'
ptxas info    : Function properties for _Z1kPfS_
    0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info    : Used 17 registers, 336 bytes cmem[0]
$ cuobjdump -res-usage test

Fatbin elf code:
================
arch = sm_52
code version = [1,7]
producer = <unknown>
host = linux
compile_size = 64bit

Resource usage:
 Common:
  GLOBAL:0
 Function _Z1kPfS_:
  REG:17 STACK:0 SHARED:0 LOCAL:0 CONSTANT[0]:336 CONSTANT[2]:60 TEXTURE:0 SURFACE:0 SAMPLER:0
 Function _Z1ff:
  REG:14 STACK:0 SHARED:0 LOCAL:0 TEXTURE:0 SURFACE:0 SAMPLER:0
$ cuobjdump -sass ./test

Fatbin elf code:
================
arch = sm_52
code version = [1,7]
producer = <unknown>
host = linux
compile_size = 64bit

        code for sm_52
                Function : _Z1kPfS_
        .headerflags    @"EF_CUDA_SM52 EF_CUDA_PTX_SM(EF_CUDA_SM52)"
                                                                     /* 0x083fc400e3e007f6 */
        /*0008*/                   MOV R1, c[0x0][0x20] ;            /* 0x4c98078000870001 */
        /*0010*/                   S2R R2, SR_TID.X ;                /* 0xf0c8000002170002 */
        /*0018*/                   SHL R16, R2.reuse, 0x2 ;          /* 0x3848000000270210 */
                                                                     /* 0x001fc800fec007f5 */
        /*0028*/                   SHR.U32 R2, R2, 0x1e ;            /* 0x3828000001e70202 */
        /*0030*/                   IADD R4.CC, R16, c[0x0][0x140] ;  /* 0x4c10800005071004 */
        /*0038*/                   IADD.X R5, R2, c[0x0][0x144] ;    /* 0x4c10080005170205 */
                                                                     /* 0x001fd821ffa000b4 */
        /*0048*/                   LDG.E R4, [R4] ;                  /* 0xeed4200000070404 */
        /*0050*/                   JCAL 0x0 ;                        /* 0xe220000000000040 */
        /*0058*/                   IADD R6.CC, R16, c[0x0][0x148] ;  /* 0x4c10800005271006 */
                                                                     /* 0x001ffc00fea007f2 */
        /*0068*/                   IADD.X R7, R2, c[0x0][0x14c] ;    /* 0x4c10080005370207 */
        /*0070*/                   STG.E [R6], R4 ;                  /* 0xeedc200000070604 */
        /*0078*/                   EXIT ;                            /* 0xe30000000007000f */
                                                                     /* 0x001f8000fc0007ff */
        /*0088*/                   BRA 0x80 ;                        /* 0xe2400fffff07000f */
        /*0090*/                   NOP;                              /* 0x50b0000000070f00 */
        /*0098*/                   NOP;                              /* 0x50b0000000070f00 */
                                                                     /* 0x001f8000fc0007e0 */
        /*00a8*/                   NOP;                              /* 0x50b0000000070f00 */
        /*00b0*/                   NOP;                              /* 0x50b0000000070f00 */
        /*00b8*/                   NOP;                              /* 0x50b0000000070f00 */
                ..........


                Function : _Z1ff
        .headerflags    @"EF_CUDA_SM52 EF_CUDA_PTX_SM(EF_CUDA_SM52)"
                                                                                    /* 0x001fec00fe2207f1 */
        /*0008*/                   FSETP.GT.AND P0, PT, |R4|.reuse, 14.5, PT ;      /* 0x36b403c168070487 */
        /*0010*/                   LOP32I.AND R0, R4, 0x80000000 ;                  /* 0x0408000000070400 */
        /*0018*/                   MOV32I R10, 0x3be6e05b ;                         /* 0x0103be6e05b7f00a */
                                                                                    /* 0x001f9440fe2007f6 */
        /*0028*/               @P0 LOP32I.OR R4, R0, 0x41680000 ;                   /* 0x0424168000000004 */
        /*0030*/                   FMUL32I R3, R4.reuse, -0.70710676908493041992 ;  /* 0x1e0bf3504f370403 */
        /*0038*/                   FSETP.GEU.AND P1, PT, R4, -1, PT ;               /* 0x37be03bf8007040f */
                                                                                    /* 0x001fd800fec207f6 */
        /*0048*/                   FFMA R5, R4.reuse, c[0x2][0x0], -R3 ;            /* 0x4982018800070405 */
        /*0050*/                   FFMA R5, R4, c[0x2][0x4], R5 ;                   /* 0x4980028800170405 */
        /*0058*/                   FADD R0, R3, R5 ;                                /* 0x5c58000000570300 */
                                                                                    /* 0x001c4440fe0207f2 */
        /*0068*/                   FADD R7, |R0|.reuse, 4 ;                         /* 0x3858404080070007 */
        /*0070*/         {         FADD R6, |R0|.reuse, -4 ;                        /* 0x3958404080070006 */
        /*0078*/                   MUFU.RCP R7, R7         }
                                                                                    /* 0x5080000000470707 */
                                                                                    /* 0x001fd840fe2007e1 */
        /*0088*/                   FMUL R12, R0, R0 ;                               /* 0x5c6800000007000c */
        /*0090*/                   FSETP.LT.AND P2, PT, R0.reuse, RZ, PT ;          /* 0x5bb103800ff70017 */
        /*0098*/              @!P1 FADD R3, R3, -R0 ;                               /* 0x5c58200000090303 */
                                                                                    /* 0x001fd401fe2007f1 */
        /*00a8*/              @!P1 FADD R3, R5, R3 ;                                /* 0x5c58000000390503 */
        /*00b0*/                   FMUL R8, R6, R7 ;                                /* 0x5c68000000770608 */
        /*00b8*/                   FADD R6, |R0|, -RZ ;                             /* 0x5c5860000ff70006 */
                                                                                    /* 0x001fd800fcc007fd */
        /*00c8*/                   FADD R9, R8, 1 ;                                 /* 0x3858003f80070809 */
        /*00d0*/                   FFMA R9, R9, -4, R6 ;                            /* 0x3380034080070909 */
        /*00d8*/                   FFMA R9, R8, -R6, R9 ;                           /* 0x5981048000670809 */
                                                                                    /* 0x081fc400fea007f1 */
        /*00e8*/                   FFMA R9, R7, R9, R8 ;                            /* 0x5980040000970709 */
        /*00f0*/                   MOV32I R7, 0x3f800000 ;                          /* 0x0103f8000007f007 */
        /*00f8*/                   FFMA R8, R9.reuse, c[0x2][0x8], R10 ;            /* 0x4980050800270908 */
                                                                                    /* 0x001fcc00fe0007f2 */
        /*0108*/                   FFMA R7, R6, 2, R7 ;                             /* 0x328003c000070607 */
        /*0110*/         {         FMUL32I R10, R12, -1.4426950216293334961 ;       /* 0x1e0bfb8aa3b70c0a */
        /*0118*/                   MUFU.RCP R7, R7         }
                                                                                    /* 0x5080000000470707 */
                                                                                    /* 0x001f9800e2c207f0 */
        /*0128*/         {         FFMA R8, R9.reuse, R8, c[0x2][0xc] ;             /* 0x5180040800370908 */
        /*0130*/                   F2F.F32.F32.TRUNC R13, R10         }
                                                                                    /* 0x5ca8058000a70a0d */
        /*0138*/                   FFMA R8, R9, R8, c[0x2][0x10] ;                  /* 0x5180040800470908 */
                                                                                    /* 0x081fd801fea207f1 */
        /*0148*/                   FFMA R8, R9.reuse, R8, c[0x2][0x14] ;            /* 0x5180040800570908 */
        /*0150*/                   FSETP.GT.AND P0, PT, |R13|, 126, PT ;            /* 0x36b403c2fc070d87 */
        /*0158*/                   FFMA R8, R9.reuse, R8, c[0x2][0x18] ;            /* 0x5180040800670908 */
                                                                                    /* 0x081fd800ffa207f6 */
        /*0168*/                   FFMA R8, R9.reuse, R8, c[0x2][0x1c] ;            /* 0x5180040800770908 */
        /*0170*/                   FFMA R8, R9, R8, c[0x2][0x20] ;                  /* 0x5180040800870908 */
        /*0178*/                   FFMA R8, R9.reuse, R8, c[0x2][0x24] ;            /* 0x5180040800970908 */
                                                                                    /* 0x001fd800fec007e6 */
        /*0188*/                   FFMA R8, R9, R8, c[0x2][0x28] ;                  /* 0x5180040800a70908 */
        /*0190*/                   FFMA R8, R9, R8, c[0x2][0x2c] ;                  /* 0x5180040800b70908 */
        /*0198*/                   FMUL R9, R8, R7 ;                                /* 0x5c68000000770809 */
                                                                                    /* 0x001fc400fec207f6 */
        /*01a8*/                   FMUL R11, R9.reuse, -2 ;                         /* 0x396800400007090b */
        /*01b0*/                   FFMA R11, R11, R6, R8 ;                          /* 0x5980040000670b0b */
        /*01b8*/                   FADD R8, -R9, R11 ;                              /* 0x5c59000000b70908 */
                                                                                    /* 0x001fc400fe2007e5 */
        /*01c8*/                   FADD R11, -|R0|, -RZ ;                           /* 0x5c5960000ff7000b */
        /*01d0*/                   FFMA R8, R7, R8, R9 ;                            /* 0x5980048000870708 */
        /*01d8*/                   LOP32I.AND R7, R13, 0x80000000 ;                 /* 0x0408000000070d07 */
                                                                                    /* 0x001fd400fe2807f5 */
        /*01e8*/                   FFMA R6, R6, R11, R12.reuse ;                    /* 0x5980060000b70606 */
        /*01f0*/               @P0 LOP32I.OR R13, R7, 0x42fc0000 ;                  /* 0x04242fc00000070d */
        /*01f8*/                   FSETP.GT.AND P0, PT, |R0|, c[0x2][0x38], PT ;    /* 0x4bb4038800e70087 */
                                                                                    /* 0x001fd440fe2007e6 */
        /*0208*/                   FFMA R7, R13, c[0x2][0x30], -R12 ;               /* 0x4982060800c70d07 */
        /*0210*/                   FFMA R7, R13.reuse, c[0x2][0x34], R7 ;           /* 0x4980038800d70d07 */
        /*0218*/                   FADD32I R13, R13, 12583039 ;                     /* 0x0804b40007f70d0d */
                                                                                    /* 0x001fc800fea007f1 */
        /*0228*/                   FMUL32I R7, R7, 1.4426950216293334961 ;          /* 0x1e03fb8aa3b70707 */
        /*0230*/                   SHL R13, R13, 0x17 ;                             /* 0x3848000001770d0d */
        /*0238*/                   RRO.EX2 R9, R7 ;                                 /* 0x5c90008000770009 */
                                                                                    /* 0x001fd801fcc0071d */
        /*0248*/                   MUFU.EX2 R7, R9 ;                                /* 0x5080000000270907 */
        /*0250*/                   FMUL R7, R13, R7 ;                               /* 0x5c68000000770d07 */
        /*0258*/                   FFMA R6, R7, R6, R7 ;                            /* 0x5980038000670706 */
                                                                                    /* 0x001fd400fe2007f6 */
        /*0268*/                   FMUL R6, R8, R6 ;                                /* 0x5c68000000670806 */
        /*0270*/                   SEL R4, R6, RZ, !P0 ;                            /* 0x5ca004000ff70604 */
        /*0278*/              @!P1 FMUL R6, R0, -2 ;                                /* 0x3968004000090006 */
                                                                                    /* 0x001fd800fcc007f6 */
        /*0288*/               @P2 FADD R4, -R4, 2 ;                                /* 0x3859004000020404 */
        /*0290*/              @!P1 FMUL R6, R4, R6 ;                                /* 0x5c68000000690406 */
        /*0298*/              @!P1 FFMA R4, R3, R6, R4 ;                            /* 0x5980020000690304 */
                                                                                    /* 0x001ffc00ffe007f0 */
        /*02a8*/         {         FMUL R4, R4, 0.5 ;                               /* 0x3868003f00070404 */
        /*02b0*/                   RET         }
                                                                                    /* 0xe32000000007000f */
        /*02b8*/                   BRA 0x2b8 ;                                      /* 0xe2400fffff87000f */
                ..........


$

Thanks a lot! I believe if I want to get the exclusive number, I’ll need to do separate compilation.

Keep in mind that separate compilation may increase register usage due to ABI requirements. I would expect any differences to be small, but since no information was given as to how this data would be used, I thought I should point out this caveat.

I’ll be aware of this, thanks for bringing this up!