code for sm_86 Function : __inference_reduction_1d_parallel_2d_contraction_25_Sum_kernel .headerflags @"EF_CUDA_SM86 EF_CUDA_PTX_SM(EF_CUDA_SM86)" /*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */ /* 0x000fc40000000f00 */ /*0010*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */ /* 0x000e220000002100 */ /*0020*/ ULDC.64 UR4, c[0x0][0x118] ; /* 0x0000460000047ab9 */ /* 0x000fe20000000a00 */ /*0030*/ BSSY B0, 0x1f0 ; /* 0x000001b000007945 */ /* 0x000fe40003800000 */ /*0040*/ S2R R11, SR_CTAID.X ; /* 0x00000000000b7919 */ /* 0x000e680000002500 */ /*0050*/ BAR.SYNC 0x0 ; /* 0x0000000000007b1d */ /* 0x000fec0000000000 */ /*0060*/ ISETP.GT.AND P0, PT, R0, 0xff, PT ; /* 0x000000ff0000780c */ /* 0x001fda0003f04270 */ /*0070*/ @P0 BRA 0x1e0 ; /* 0x0000016000000947 */ /* 0x000fea0003800000 */ /*0080*/ LEA R9, R11, R0, 0x8 ; /* 0x000000000b097211 */ /* 0x002fe400078e40ff */ /*0090*/ IADD3 R8, R0.reuse, -0x100, RZ ; /* 0xffffff0000087810 */ /* 0x040fe40007ffe0ff */ /*00a0*/ SHF.L.U32 R13, R0, 0x2, RZ ; /* 0x00000002000d7819 */ /* 0x000fe200000006ff */ /*00b0*/ IMAD R9, R9, 0x3, RZ ; /* 0x0000000309097824 */ /* 0x000fc600078e02ff */ /*00c0*/ MOV R3, 0x4 ; /* 0x0000000400037802 */ /* 0x000fe40000000f00 */ /*00d0*/ IADD3 R4, R9.reuse, 0x1, RZ ; /* 0x0000000109047810 */ /* 0x040fe40007ffe0ff */ /*00e0*/ IADD3 R6, R9, 0x2, RZ ; /* 0x0000000209067810 */ /* 0x000fc60007ffe0ff */ /*00f0*/ IMAD.WIDE R4, R4, R3, c[0x0][0x168] ; /* 0x00005a0004047625 */ /* 0x000fc800078e0203 */ /*0100*/ IMAD.WIDE R6, R6, R3.reuse, c[0x0][0x168] ; /* 0x00005a0006067625 */ /* 0x080fe400078e0203 */ /*0110*/ LDG.E R5, [R4.64] ; /* 0x0000000404057981 */ /* 0x000ea4000c1e1900 */ /*0120*/ IMAD.WIDE R2, R9, R3, c[0x0][0x168] ; /* 0x00005a0009027625 */ /* 0x000fe400078e0203 */ /*0130*/ LDG.E R7, [R6.64] ; /* 0x0000000406077981 */ /* 0x000ee8000c1e1900 */ /*0140*/ LDG.E R2, [R2.64] ; /* 0x0000000402027981 */ /* 0x000f22000c1e1900 */ /*0150*/ IADD3 R8, R8, 0x100, RZ ; /* 0x0000010008087810 */ /* 0x000fc40007ffe0ff */ /*0160*/ IADD3 R9, R9, 0x300, RZ ; /* 0x0000030009097810 */ /* 0x000fe40007ffe0ff */ /*0170*/ ISETP.GT.U32.AND P0, PT, R8, 0x7ffffeff, PT ; /* 0x7ffffeff0800780c */ /* 0x000fe20003f04070 */ /*0180*/ FADD R10, RZ, R2 ; /* 0x00000002ff0a7221 */ /* 0x010fc80000000000 */ /*0190*/ FADD R10, R10, R5 ; /* 0x000000050a0a7221 */ /* 0x004fc80000000000 */ /*01a0*/ FADD R10, R10, R7 ; /* 0x000000070a0a7221 */ /* 0x008fca0000000000 */ /*01b0*/ STS [R13], R10 ; /* 0x0000000a0d007388 */ /* 0x0001e40000000800 */ /*01c0*/ IADD3 R13, R13, 0x400, RZ ; /* 0x000004000d0d7810 */ /* 0x001fe20007ffe0ff */ /*01d0*/ @P0 BRA 0xc0 ; /* 0xfffffee000000947 */ /* 0x000fea000383ffff */ /*01e0*/ BSYNC B0 ; /* 0x0000000000007941 */ /* 0x002fea0003800000 */ /*01f0*/ BAR.SYNC 0x0 ; /* 0x0000000000007b1d */ /* 0x000fec0000000000 */ /*0200*/ ISETP.GT.AND P0, PT, R0, RZ, PT ; /* 0x000000ff0000720c */ /* 0x000fe20003f04270 */ /*0210*/ BSSY B0, 0x3b0 ; /* 0x0000019000007945 */ /* 0x000fd80003800000 */ /*0220*/ @P0 BRA 0x3a0 ; /* 0x0000017000000947 */ /* 0x000fea0003800000 */ /*0230*/ MOV R2, 0x4 ; /* 0x0000000400027802 */ /* 0x000fca0000000f00 */ /*0240*/ IMAD.WIDE R2, R11, R2, c[0x0][0x198] ; /* 0x000066000b027625 */ /* 0x000fca00078e0202 */ /*0250*/ LDG.E R13, [R2.64] ; /* 0x00000004020d7981 */ /* 0x000162000c1e1900 */ /*0260*/ BSSY B1, 0x390 ; /* 0x0000012000017945 */ /* 0x000fe40003800000 */ /*0270*/ MOV R12, RZ ; /* 0x000000ff000c7202 */ /* 0x000fca0000000f00 */ /*0280*/ LDS.128 R8, [R12] ; /* 0x000000000c087984 */ /* 0x000e680000000c00 */ /*0290*/ LDS.128 R4, [R12+0x10] ; /* 0x000010000c047984 */ /* 0x0004e40000000c00 */ /*02a0*/ IADD3 R12, R12, 0x20, RZ ; /* 0x000000200c0c7810 */ /* 0x004fc80007ffe0ff */ /*02b0*/ ISETP.NE.AND P0, PT, R12, 0x400, PT ; /* 0x000004000c00780c */ /* 0x000fe20003f05270 */ /*02c0*/ FADD R8, R8, R13 ; /* 0x0000000d08087221 */ /* 0x022fc80000000000 */ /*02d0*/ FADD R9, R9, R8 ; /* 0x0000000809097221 */ /* 0x000fc80000000000 */ /*02e0*/ FADD R10, R10, R9 ; /* 0x000000090a0a7221 */ /* 0x000fc80000000000 */ /*02f0*/ FADD R11, R11, R10 ; /* 0x0000000a0b0b7221 */ /* 0x000fc80000000000 */ /*0300*/ FADD R4, R11, R4 ; /* 0x000000040b047221 */ /* 0x008fc80000000000 */ /*0310*/ FADD R5, R5, R4 ; /* 0x0000000405057221 */ /* 0x000fc80000000000 */ /*0320*/ FADD R6, R6, R5 ; /* 0x0000000506067221 */ /* 0x000fc80000000000 */ /*0330*/ FADD R13, R7, R6 ; /* 0x00000006070d7221 */ /* 0x000fe20000000000 */ /*0340*/ @P0 BRA 0x280 ; /* 0xffffff3000000947 */ /* 0x000fea000383ffff */ /*0350*/ ISETP.GE.AND P0, PT, R0.reuse, -0xff, PT ; /* 0xffffff010000780c */ /* 0x040fe40003f06270 */ /*0360*/ IADD3 R0, R0, 0x100, RZ ; /* 0x0000010000007810 */ /* 0x000fd60007ffe0ff */ /*0370*/ @!P0 BRA 0x270 ; /* 0xfffffef000008947 */ /* 0x000fea000383ffff */ /*0380*/ BSYNC B1 ; /* 0x0000000000017941 */ /* 0x000fea0003800000 */ /*0390*/ STG.E [R2.64], R13 ; /* 0x0000000d02007986 */ /* 0x0003e4000c101904 */ /*03a0*/ BSYNC B0 ; /* 0x0000000000007941 */ /* 0x000fea0003800000 */ /*03b0*/ BAR.SYNC 0x0 ; /* 0x0000000000007b1d */ /* 0x000fec0000000000 */ /*03c0*/ EXIT ; /* 0x000000000000794d */ /* 0x000fea0003800000 */ /*03d0*/ BRA 0x3d0; /* 0xfffffff000007947 */ /* 0x000fc0000383ffff */ /*03e0*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*03f0*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0400*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0410*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0420*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0430*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0440*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0450*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0460*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ /*0470*/ NOP; /* 0x0000000000007918 */ /* 0x000fc00000000000 */ ..........