Digging into this problem further and disassembling the original code:
// Kernel function to propagate product; Since it's log space product is sum
__global__
void prd(double *y, int n, double *x, int m)
{
assert(n>m);
long index = blockIdx.x * blockDim.x + threadIdx.x;
long stride = blockDim.x * gridDim.x;
do
{
y[index] += x[index%(1L<<m)];
index += stride;
}
while(index < (1L<<n));
}
it appears that the culprit is the left shift operator 1L<<m
. Replacing it with a constant or even 1L<<2
removes the floating point operations. Furthermore, disassembling the code shows a reciprocal MUFU (MUlti-Function Unit) operation MUFU.RCP R4, R4;
(line 0260), which apparently is some kind of shift optimization (really?!!). Any help beyond this will be highly appreciated.
$ cuobjdump compute --dump-sass
...
code for sm_70
Function : _Z3prdPdiS_i
.headerflags @"EF_CUDA_SM70 EF_CUDA_PTX_SM(EF_CUDA_SM70)"
/*0000*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0010*/ IMAD.U32 R1, RZ, RZ, c[0x0][0x28]; /* 0x00000a00ff017624 */
/* 0x000fd000078e00ff */
/*0020*/ IMAD.U32 R0, RZ, RZ, c[0x0][0x168]; /* 0x00005a00ff007624 */
/* 0x000fca00078e00ff */
/*0030*/ ISETP.GT.AND P0, PT, R0, c[0x0][0x178], PT, !PT; /* 0x00005e0000007a0c */
/* 0x000fd80003f042f0 */
/*0040*/ @P0 BRA 0x110; /* 0x000000c000000947 */
/* 0x000fea0003800000 */
/*0050*/ MOV R4, 0x0; /* 0x0000000000047802 */
/* 0x000fe20000000f00 */
/*0060*/ IMAD.U32 R8, RZ, RZ, 0x87; /* 0x00000087ff087424 */
/* 0x000fe200078e00ff */
/*0070*/ MOV R5, 0x0; /* 0x0000000000057802 */
/* 0x000fe20000000f00 */
/*0080*/ IMAD.U32 R12, RZ, RZ, 0x1; /* 0x00000001ff0c7424 */
/* 0x000fe200078e00ff */
/*0090*/ MOV R6, 0x0; /* 0x0000000000067802 */
/* 0x000fe20000000f00 */
/*00a0*/ IMAD.U32 R13, RZ, RZ, RZ; /* 0x000000ffff0d7224 */
/* 0x000fe200078e00ff */
/*00b0*/ MOV R7, 0x0; /* 0x0000000000077802 */
/* 0x000fe40000000f00 */
/*00c0*/ MOV R10, 0x0; /* 0x00000000000a7802 */
/* 0x000fe40000000f00 */
/*00d0*/ MOV R11, 0x0; /* 0x00000000000b7802 */
/* 0x000fc40000000f00 */
/*00e0*/ MOV R20, 0x0; /* 0x0000000000147802 */
/* 0x000fe40000000f00 */
/*00f0*/ MOV R21, 0x0; /* 0x0000000000157802 */
/* 0x000fd00000000f00 */
/*0100*/ CALL.ABS.NOINC 0x0; /* 0x0000000000007943 */
/* 0x000fea0003c00000 */
/*0110*/ S2R R0, SR_CTAID.X; /* 0x0000000000007919 */
/* 0x000e220000002500 */
/*0120*/ S2R R2, SR_TID.X; /* 0x0000000000027919 */
/* 0x000e220000002100 */
/*0130*/ IMAD.U32 R12, RZ, RZ, 0x1; /* 0x00000001ff0c7424 */
/* 0x000fe400078e00ff */
/*0140*/ IMAD.U32 R15, RZ, RZ, c[0x0][0xc]; /* 0x00000300ff0f7624 */
/* 0x000fe400078e00ff */
/*0150*/ IMAD.U32 R9, RZ, RZ, RZ; /* 0x000000ffff097224 */
/* 0x000fe200078e00ff */
/*0160*/ SHF.L.U64.HI R8, R12.reuse, c[0x0][0x178], RZ; /* 0x00005e000c087a19 */
/* 0x040fe400000102ff */
/*0170*/ SHF.L.U32 R10, R12, c[0x0][0x178], RZ; /* 0x00005e000c0a7a19 */
/* 0x000fc400000006ff */
/*0180*/ SHF.L.U64.HI R11, R12.reuse, c[0x0][0x168], RZ; /* 0x00005a000c0b7a19 */
/* 0x040fe400000102ff */
/*0190*/ SHF.L.U32 R12, R12, c[0x0][0x168], RZ; /* 0x00005a000c0c7a19 */
/* 0x000fe200000006ff */
/*01a0*/ IMAD R0, R0, c[0x0][0x0], R2; /* 0x0000000000007a24 */
/* 0x001fd000078e0202 */
/*01b0*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*01c0*/ ISETP.EQ.U32.AND P0, PT, RZ, RZ, PT, !PT; /* 0x000000ffff00720c */
/* 0x000fe40003f020f0 */
/*01d0*/ LOP3.LUT R2, R9, R8, RZ, 0xfc, !PT; /* 0x0000000809027212 */
/* 0x000fc800078efcff */
/*01e0*/ ISETP.EQ.AND.EX P0, PT, R2, RZ, PT, P0; /* 0x000000ff0200720c */
/* 0x000fe20003f02300 */
/*01f0*/ BSSY B0, 0x380; /* 0x0000018000007945 */
/* 0x000ff60003800000 */
/*0200*/ @P0 BRA 0x250; /* 0x0000004000000947 */
/* 0x000fea0003800000 */
/*0210*/ MOV R14, 0x230; /* 0x00000230000e7802 */
/* 0x000fd00000000f00 */
/*0220*/ CALL.REL.NOINC 0x470; /* 0x0000024000007944 */
/* 0x000fea0003c00000 */
/*0230*/ IMAD.U32 R3, RZ, RZ, R4; /* 0x000000ffff037224 */
/* 0x000fe200078e0004 */
/*0240*/ BRA 0x370; /* 0x0000012000007947 */
/* 0x000fee0003800000 */
/*0250*/ I2F.U32.RP R4, R10; /* 0x0000000a00047306 */
/* 0x000e240000209000 */
/*0260*/ <b>MUFU.RCP R4, R4;</b> /* 0x0000000400047308 */
/* 0x001e240000001000 */
/*0270*/ IADD3 R5, R4, 0xffffffe, RZ; /* 0x0ffffffe04057810 */
/* 0x001fcc0007ffe0ff */
/*0280*/ F2I.FTZ.U32.TRUNC.NTZ R3, R5; /* 0x0000000500037305 */
/* 0x000062000021f000 */
/*0290*/ MOV R2, RZ; /* 0x000000ff00027202 */
/* 0x000fe20000000f00 */
/*02a0*/ IMAD R13, RZ, RZ, -R3; /* 0x000000ffff0d7224 */
/* 0x002fc800078e0a03 */
/*02b0*/ IMAD R13, R13, R10, RZ; /* 0x0000000a0d0d7224 */
/* 0x000fc800078e02ff */
/*02c0*/ IMAD.WIDE.U32 R2, R3, R13, R2; /* 0x0000000d03027225 */
/* 0x000fd000078e0002 */
/*02d0*/ IMAD.WIDE.U32 R2, R3, R0, RZ; /* 0x0000000003027225 */
/* 0x000fcc00078e00ff */
/*02e0*/ IMAD R3, RZ, RZ, -R3; /* 0x000000ffff037224 */
/* 0x000fc800078e0a03 */
/*02f0*/ IMAD R3, R10, R3, R0; /* 0x000000030a037224 */
/* 0x000fca00078e0200 */
/*0300*/ ISETP.GE.U32.AND P0, PT, R3, R10, PT, !PT; /* 0x0000000a0300720c */
/* 0x000fe40003f060f0 */
/*0310*/ ISETP.EQ.U32.AND P1, PT, R10, RZ, PT, !PT; /* 0x000000ff0a00720c */
/* 0x000fd40003f220f0 */
/*0320*/ @P0 IMAD R3, R3, 0x1, -R10; /* 0x0000000103030824 */
/* 0x000fca00078e0a0a */
/*0330*/ ISETP.GE.U32.AND P0, PT, R3, R10, PT, !PT; /* 0x0000000a0300720c */
/* 0x000fe20003f060f0 */
/*0340*/ IMAD.U32 R5, RZ, RZ, RZ; /* 0x000000ffff057224 */
/* 0x000fd600078e00ff */
/*0350*/ @P0 IMAD R3, R3, 0x1, -R10; /* 0x0000000103030824 */
/* 0x000fca00078e0a0a */
/*0360*/ @P1 LOP3.LUT R3, RZ, R10, RZ, 0x33, !PT; /* 0x0000000aff031212 */
/* 0x000fd000078e33ff */
/*0370*/ BSYNC B0; /* 0x0000000000007941 */
/* 0x000fea0003800000 */
/*0380*/ LEA R6, P1, R3.reuse, c[0x0][0x170], 0x3; /* 0x00005c0003067a11 */
/* 0x040fe400078218ff */
/*0390*/ LEA R2, P0, R0, c[0x0][0x160], 0x3; /* 0x0000580000027a11 */
/* 0x000fe400078018ff */
/*03a0*/ LEA.HI.X R7, R3, c[0x0][0x174], R5, 0x3, P1; /* 0x00005d0003077a11 */
/* 0x000fe400008f1c05 */
/*03b0*/ LEA.HI.X R3, R0, c[0x0][0x164], R9, 0x3, P0; /* 0x0000590000037a11 */
/* 0x000fcc00000f1c09 */
/*03c0*/ LDG.E.64.SYS R6, [R6]; /* 0x0000000006067381 */
/* 0x000e2400001eeb00 */
/*03d0*/ LDG.E.64.SYS R4, [R2]; /* 0x0000000002047381 */
/* 0x000e2200001eeb00 */
/*03e0*/ IMAD R13, R15, c[0x0][0x0], RZ; /* 0x000000000f0d7a24 */
/* 0x000fca00078e02ff */
/*03f0*/ IADD3 R0, P0, R13, R0, RZ; /* 0x000000000d007210 */
/* 0x000fca0007f1e0ff */
/*0400*/ IMAD.X R9, RZ, RZ, R9, P0; /* 0x000000ffff097224 */
/* 0x000fe200000e0609 */
/*0410*/ ISETP.LT.U32.AND P0, PT, R0, R12, PT, !PT; /* 0x0000000c0000720c */
/* 0x000fc80003f010f0 */
/*0420*/ ISETP.LT.AND.EX P0, PT, R9, R11, PT, P0; /* 0x0000000b0900720c */
/* 0x000fe20003f01300 */
/*0430*/ DADD R4, R4, R6; /* 0x0000000004047229 */
/* 0x001e120000000006 */
/*0440*/ STG.E.64.SYS [R2], R4; /* 0x0000000402007386 */
/* 0x0011e4000010eb00 */
/*0450*/ @P0 BRA 0x1b0; /* 0xfffffd5000000947 */
/* 0x001fea000383ffff */
/*0460*/ EXIT; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/*0470*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0480*/ IADD3 R16, P0, RZ, -R10, RZ; /* 0x8000000aff107210 */
/* 0x000fe40007f1e0ff */
/*0490*/ ISETP.LT.AND P2, PT, R8, RZ, PT, !PT; /* 0x000000ff0800720c */
/* 0x000fc60003f412f0 */
/*04a0*/ IMAD.X R13, RZ, RZ, ~R8, P0; /* 0x000000ffff0d7224 */
/* 0x000fe200000e0e08 */
/*04b0*/ ISETP.LT.AND P1, PT, R8, RZ, PT, !PT; /* 0x000000ff0800720c */
/* 0x000fc80003f212f0 */
/*04c0*/ SEL R13, R13, R8, P2; /* 0x000000080d0d7207 */
/* 0x000fe40001000000 */
/*04d0*/ SEL R16, R16, R10, P1; /* 0x0000000a10107207 */
/* 0x000fc60000800000 */
/*04e0*/ IMAD.U32 R17, RZ, RZ, R13; /* 0x000000ffff117224 */
/* 0x000fcc00078e000d */
/*04f0*/ I2F.U64.RP R6, R16; /* 0x0000001000067312 */
/* 0x0000640000309000 */
/*0500*/ MUFU.RCP R6, R6; /* 0x0000000600067308 */
/* 0x002e240000001000 */
/*0510*/ IADD3 R2, R6, 0x1ffffffe, RZ; /* 0x1ffffffe06027810 */
/* 0x001fcc0007ffe0ff */
/*0520*/ F2I.U64.TRUNC R2, R2; /* 0x0000000200027311 */
/* 0x000e24000020d800 */
/*0530*/ IMAD.WIDE.U32 R4, R2.reuse, R16.reuse, RZ; /* 0x0000001002047225 */
/* 0x0c1fe400078e00ff */
/*0540*/ IMAD R7, R2, R16, RZ; /* 0x0000001002077224 */
/* 0x000fc800078e02ff */
/*0550*/ IMAD R5, R2, R13, R5; /* 0x0000000d02057224 */
/* 0x000fe200078e0205 */
/*0560*/ IADD3 R7, P0, -R7, RZ, RZ; /* 0x000000ff07077210 */
/* 0x000fc60007f1e1ff */
/*0570*/ IMAD R18, R3, R16, R5; /* 0x0000001003127224 */
/* 0x000fe400078e0205 */
/*0580*/ IMAD.WIDE.U32 R4, R2, R7, RZ; /* 0x0000000702047225 */
/* 0x000fc600078e00ff */
/*0590*/ IADD3.X R17, RZ, ~R18, RZ, P0, !PT; /* 0x80000012ff117210 */
/* 0x000fc600007fe4ff */
/*05a0*/ IMAD.U32 R4, RZ, RZ, R5; /* 0x000000ffff047224 */
/* 0x000fe400078e0005 */
/*05b0*/ IMAD.U32 R5, RZ, RZ, R2; /* 0x000000ffff057224 */
/* 0x000fc800078e0002 */
/*05c0*/ IMAD.WIDE.U32 R4, P1, R2, R17, R4; /* 0x0000001102047225 */
/* 0x000fd00007820004 */
/*05d0*/ IMAD.WIDE.U32 R4, P2, R3.reuse, R7, R4; /* 0x0000000703047225 */
/* 0x040fe40007840004 */
/*05e0*/ IMAD R7, R3, R17, RZ; /* 0x0000001103077224 */
/* 0x000fc800078e02ff */
/*05f0*/ IMAD.U32 R18, RZ, RZ, R5; /* 0x000000ffff127224 */
/* 0x000fe400078e0005 */
/*0600*/ IMAD.WIDE.U32 R4, R3, R17, RZ; /* 0x0000001103047225 */
/* 0x000fc600078e00ff */
/*0610*/ IADD3 R18, P0, R7, R18, RZ; /* 0x0000001207127210 */
/* 0x000fc60007f1e0ff */
/*0620*/ IMAD.X R2, R5, 0x1, R3, P1; /* 0x0000000105027824 */
/* 0x000fe400008e0603 */
/*0630*/ IMAD R4, R18.reuse, R16.reuse, RZ; /* 0x0000001012047224 */
/* 0x0c0fe400078e02ff */
/*0640*/ IMAD.WIDE.U32 R6, R18, R16, RZ; /* 0x0000001012067225 */
/* 0x000fe200078e00ff */
/*0650*/ IADD3.X R17, RZ, RZ, R2, P0, P2; /* 0x000000ffff117210 */
/* 0x000fe400007e4402 */
/*0660*/ IADD3 R4, P0, -R4, RZ, RZ; /* 0x000000ff04047210 */
/* 0x000fc60007f1e1ff */
/*0670*/ IMAD R7, R18.reuse, R13, R7; /* 0x0000000d12077224 */
/* 0x040fe400078e0207 */
/*0680*/ IMAD.WIDE.U32 R2, R18, R4, RZ; /* 0x0000000412027225 */
/* 0x000fe400078e00ff */
/*0690*/ IMAD R7, R17, R16, R7; /* 0x0000001011077224 */
/* 0x000fc800078e0207 */
/*06a0*/ IMAD.U32 R2, RZ, RZ, R3; /* 0x000000ffff027224 */
/* 0x000fe400078e0003 */
/*06b0*/ IMAD.X R7, RZ, RZ, ~R7, P0; /* 0x000000ffff077224 */
/* 0x000fe400000e0e07 */
/*06c0*/ IMAD.U32 R3, RZ, RZ, R18; /* 0x000000ffff037224 */
/* 0x000fc800078e0012 */
/*06d0*/ IMAD.WIDE.U32 R2, P1, R18, R7, R2; /* 0x0000000712027225 */
/* 0x000fe20007820002 */
/*06e0*/ ISETP.LT.AND P3, PT, R9, RZ, PT, !PT; /* 0x000000ff0900720c */
/* 0x000fce0003f612f0 */
/*06f0*/ IMAD.WIDE.U32 R2, P0, R17, R4, R2; /* 0x0000000411027225 */
/* 0x000fe20007800002 */
/*0700*/ IADD3 R6, P2, RZ, -R0, RZ; /* 0x80000000ff067210 */
/* 0x000fca0007f5e0ff */
/*0710*/ IMAD R2, R17, R7, RZ; /* 0x0000000711027224 */
/* 0x000fe200078e02ff */
/*0720*/ SEL R6, R6, R0, P3; /* 0x0000000006067207 */
/* 0x000fc80001800000 */
/*0730*/ IADD3 R19, P3, R2, R3, RZ; /* 0x0000000302137210 */
/* 0x000fe20007f7e0ff */
/*0740*/ IMAD.WIDE.U32 R2, R17, R7, RZ; /* 0x0000000711027225 */
/* 0x000fe200078e00ff */
/*0750*/ ISETP.LT.AND P4, PT, R9, RZ, PT, !PT; /* 0x000000ff0900720c */
/* 0x000fc60003f812f0 */
/*0760*/ IMAD.WIDE.U32 R4, R19, R6, RZ; /* 0x0000000613047225 */
/* 0x000fe200078e00ff */
/*0770*/ IADD3.X R18, RZ, ~R9, RZ, P2, !PT; /* 0x80000009ff127210 */
/* 0x000fe200017fe4ff */
/*0780*/ IMAD.X R17, R3, 0x1, R17, P1; /* 0x0000000103117824 */
/* 0x000fc600008e0611 */
/*0790*/ SEL R18, R18, R9, P4; /* 0x0000000912127207 */
/* 0x000fe20002000000 */
/*07a0*/ IMAD.U32 R4, RZ, RZ, R5; /* 0x000000ffff047224 */
/* 0x000fe400078e0005 */
/*07b0*/ IMAD.U32 R5, RZ, RZ, RZ; /* 0x000000ffff057224 */
/* 0x000fe200078e00ff */
/*07c0*/ IADD3.X R17, RZ, RZ, R17, P3, P0; /* 0x000000ffff117210 */
/* 0x000fc60001fe0411 */
/*07d0*/ IMAD.WIDE.U32 R4, R19, R18, R4; /* 0x0000001213047225 */
/* 0x000fd000078e0004 */
/*07e0*/ IMAD.WIDE.U32 R2, P1, R17.reuse, R6, R4; /* 0x0000000611027225 */
/* 0x040fe40007820004 */
/*07f0*/ IMAD R7, R17, R18, RZ; /* 0x0000001211077224 */
/* 0x000fc800078e02ff */
/*0800*/ IMAD.U32 R4, RZ, RZ, R3; /* 0x000000ffff047224 */
/* 0x000fe400078e0003 */
/*0810*/ IMAD.WIDE.U32 R2, R17, R18, RZ; /* 0x0000001211027225 */
/* 0x000fc600078e00ff */
/*0820*/ IADD3 R7, P0, R7, R4, RZ; /* 0x0000000407077210 */
/* 0x000fc60007f1e0ff */
/*0830*/ IMAD.U32 R4, RZ, RZ, R3; /* 0x000000ffff047224 */
/* 0x000fe400078e0003 */
/*0840*/ IMAD.WIDE.U32 R2, R7, R16, RZ; /* 0x0000001007027225 */
/* 0x000fe400078e00ff */
/*0850*/ IMAD.X R4, RZ, RZ, R4, P1; /* 0x000000ffff047224 */
/* 0x000fc800008e0604 */
/*0860*/ IMAD.X R5, RZ, RZ, R4, P0; /* 0x000000ffff057224 */
/* 0x000fe400000e0604 */
/*0870*/ IMAD R4, R7.reuse, R16, RZ; /* 0x0000001007047224 */
/* 0x040fe400078e02ff */
/*0880*/ IMAD R3, R7, R13, R3; /* 0x0000000d07037224 */
/* 0x000fc600078e0203 */
/*0890*/ IADD3 R7, P1, -R4, R6, RZ; /* 0x0000000604077210 */
/* 0x000fe20007f3e1ff */
/*08a0*/ IMAD R3, R5, R16, R3; /* 0x0000001005037224 */
/* 0x000fc600078e0203 */
/*08b0*/ ISETP.GE.U32.AND P0, PT, R7, R16, PT, !PT; /* 0x000000100700720c */
/* 0x000fe40003f060f0 */
/*08c0*/ IADD3.X R18, ~R3, R18, RZ, P1, !PT; /* 0x0000001203127210 */
/* 0x000fe40000ffe5ff */
/*08d0*/ IADD3 R2, P1, R7, -R16, RZ; /* 0x8000001007027210 */
/* 0x000fe40007f3e0ff */
/*08e0*/ ISETP.GE.U32.AND.EX P0, PT, R18, R13, PT, P0; /* 0x0000000d1200720c */
/* 0x000fc60003f06100 */
/*08f0*/ IMAD.X R3, R18, 0x1, ~R13, P1; /* 0x0000000112037824 */
/* 0x000fe200008e0e0d */
/*0900*/ SEL R2, R2, R7, P0; /* 0x0000000702027207 */
/* 0x000fc80000000000 */
/*0910*/ SEL R3, R3, R18, P0; /* 0x0000001203037207 */
/* 0x000fe40000000000 */
/*0920*/ ISETP.GE.U32.AND P0, PT, R2.reuse, R16.reuse, PT, !PT; /* 0x000000100200720c */
/* 0x0c0fe40003f060f0 */
/*0930*/ IADD3 R4, P1, R2, -R16, RZ; /* 0x8000001002047210 */
/* 0x000fe40007f3e0ff */
/*0940*/ ISETP.GE.U32.AND.EX P0, PT, R3, R13, PT, P0; /* 0x0000000d0300720c */
/* 0x000fc60003f06100 */
/*0950*/ IMAD.X R5, R3, 0x1, ~R13, P1; /* 0x0000000103057824 */
/* 0x000fe200008e0e0d */
/*0960*/ SEL R4, R4, R2, P0; /* 0x0000000204047207 */
/* 0x000fc80000000000 */
/*0970*/ SEL R5, R5, R3, P0; /* 0x0000000305057207 */
/* 0x000fe40000000000 */
/*0980*/ IADD3 R2, P1, RZ, -R4.reuse, RZ; /* 0x80000004ff027210 */
/* 0x080fe40007f3e0ff */
/*0990*/ ISETP.LT.AND P2, PT, R9.reuse, RZ, PT, !PT; /* 0x000000ff0900720c */
/* 0x040fe40003f412f0 */
/*09a0*/ ISETP.LT.AND P3, PT, R9, RZ, PT, !PT; /* 0x000000ff0900720c */
/* 0x000fe20003f612f0 */
/*09b0*/ IMAD.X R3, RZ, RZ, ~R5, P1; /* 0x000000ffff037224 */
/* 0x000fe200008e0e05 */
/*09c0*/ SEL R4, R2, R4, P2; /* 0x0000000402047207 */
/* 0x000fe20001000000 */
/*09d0*/ IMAD.U32 R2, RZ, RZ, R14; /* 0x000000ffff027224 */
/* 0x000fe200078e000e */
/*09e0*/ ISETP.EQ.U32.AND P0, PT, R10, RZ, PT, !PT; /* 0x000000ff0a00720c */
/* 0x000fc40003f020f0 */
/*09f0*/ SEL R5, R3, R5, P3; /* 0x0000000503057207 */
/* 0x000fe20001800000 */
/*0a00*/ IMAD.U32 R3, RZ, RZ, 0x0; /* 0x00000000ff037424 */
/* 0x000fe200078e00ff */
/*0a10*/ ISETP.EQ.AND.EX P0, PT, R8, RZ, PT, P0; /* 0x000000ff0800720c */
/* 0x000fc80003f02300 */
/*0a20*/ SEL R4, R4, 0xffffffff, !P0; /* 0xffffffff04047807 */
/* 0x000fe40004000000 */
/*0a30*/ SEL R5, R5, 0xffffffff, !P0; /* 0xffffffff05057807 */
/* 0x000fe20004000000 */
/*0a40*/ RET.REL.NODEC R2 0x0; /* 0xfffff5b002007950 */
/* 0x000fee0003c3ffff */
/*0a50*/ BRA 0xa50; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/*0a60*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0a70*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
.......................