Which toolkit version are you using? I’ve just run the kernel through 3.2 and got 7 registers:
[font=“Courier New”]Compiling entry function ‘_Z12Karatsuba1x1Pj’ for ‘sm_10’
ptxas info : Used 7 registers, 4+16 bytes smem, 4 bytes cmem[1]
[/font]
You can use [font=“Courier New”]decuda[/font]
// Disassembling _Z12Karatsuba1x1Pj
000000: 41002c05 00000007 mul24.lo.u32.u16.u16 $r1, s[0x000c], 0x0040
000008: a0000001 04000780 cvt.rn.u32.u16 $r0, $r0.lo
000010: 20000001 04004780 add.u32 $r0, $r0, $r1
000018: 30030001 c4100780 shl.u32 $r0, $r0, 0x00000003
000020: 2000c80d 04200780 add.u32 $r3, s[0x0010], $r0
000028: 20048609 00000003 add.b32 $r2, $r3, 0x00000004
000030: d00e0601 80c00780 mov.u32 $r0, g[$r3]
000038: d00e0405 80c00780 mov.u32 $r1, g[$r2]
000040: 40030005 00000780 mul24.lo.u32.u16.u16 $r1, $r0.lo, $r1.hi
000048: 60020211 000147c0 mad24.lo.u32.u16.u16.u32 $p0|$r4, $r0.hi, $r1.lo, $r5
000050: 30100809 c4100780 shl.u32 $r2, $r4, 0x00000010
000058: 30100811 e4100780 shr.u32 $r4, $r4, 0x00000010
000060: 6002010d 000187d8 mad24.lo.u32.u16.u16.u32 $p1|$o67, $r0.lo, $r1.lo, $r6
000068: 60020215 00014780 mad24.lo.u32.u16.u16.u32 $r5, $r0.hi, $r1.lo, $r5
000070: 21000801 04400880 @$p0.cf add.u32 $r0, $r4, c1[0x0000]
000078: 30100a15 c4100780 shl.u32 $r5, $r5, 0x00000010
000080: 60030201 0c011780 mad24c1.lo.u32.u16.u16.u32 $r0, -$r0.hi, $r1.hi, -$r4
000088: 60020001 00014780 mad24.lo.u32.u16.u16.u32 $r0, $r0.lo, $r1.lo, $r5
000090: d00e0601 a0c00780 mov.u32 g[$r3], $r0
000098: d00e0401 a0c00781 mov.end.u32 g[$r2], $r0
// segment: const (0:0000)
0000: 00010000
or [font=“Courier New”]cuobjdump[/font] to find out yourself what the registers are used for:
code for sm_10
Function : _Z12Karatsuba1x1Pj
/*0000*/ /*0x41002c0500000007*/ IMUL32I.U16.U16 R1, g [0x6].U16, 0x40;
/*0008*/ /*0xa000000104000780*/ I2I.U32.U16 R0, R0L;
/*0010*/ /*0x2000000104004780*/ IADD R0, R0, R1;
/*0018*/ /*0x30030001c4100780*/ SHL R0, R0, 0x3;
/*0020*/ /*0x2000c80d04200780*/ IADD R3, g [0x4], R0;
/*0028*/ /*0x2004860900000003*/ IADD32I R2, R3, 0x4;
/*0030*/ /*0xd00e060180c00780*/ GLD.U32 R0, global14 [R3];
/*0038*/ /*0xd00e040580c00780*/ GLD.U32 R1, global14 [R2];
/*0040*/ /*0x4003001500000780*/ IMUL.U16.U16 R5, R0L, R1H;
/*0048*/ /*0x60020211000147c0*/ IMAD.U16.C0 R4, R0H, R1L, R5;
/*0050*/ /*0x30100819c4100780*/ SHL R6, R4, 0x10;
/*0058*/ /*0x30100811e4100780*/ SHR R4, R4, 0x10;
/*0060*/ /*0x600201fd000187d8*/ IMAD.U16.C1 o [0x7f], R0L, R1L, R6;
/*0068*/ /*0x6002021500014780*/ IMAD.U16 R5, R0H, R1L, R5;
/*0070*/ /*0x2100081104400880*/ IADD R4 (C0.CARRY), R4, c [0x1] [0x0];
/*0078*/ /*0x30100a15c4100780*/ SHL R5, R5, 0x10;
/*0080*/ /*0x600302110c011780*/ IMAD.U16 R4 (C3), R0H, R1H, -R4;
/*0088*/ /*0x6002000100014780*/ IMAD.U16 R0, R0L, R1L, R5;
/*0090*/ /*0xd00e0611a0c00780*/ GST.U32 global14 [R3], R4;
/*0098*/ /*0xd00e0401a0c00781*/ GST.U32 global14 [R2], R0;