Compile flags:

-gencode arch=compute_32,code=sm_32 --ptxas-options=-v -m32 -Xcompiler -mfloat-abi=hard

This is what’s in the main body:

**global** void int_kernel_2_256_(int num_threads, int num_streams, TYPE* in, TYPE* out)

{

uint tid = threadIdx.x + blockIdx.x * blockDim.x;

TYPE x, y;

if(tid < num_threads) {

x = in[tid];

y = in[num_threads + tid];

```
x = x + CONST;
x = x && 0x000FFFFF;
y = y + CONST;
y = y && 0x000FFFFF;
```

…

out[tid] = x;

out[num_threads + tid] = y;

}

}

and in the objdump:

```
Function : _Z16int_kernel_2_256iiPiS_
```

.headerflags @“EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)”

/* 0x08a000b0a0a08c00 */*

/*0008*/ MOV R1, c[0x0][0x44]; / 0x64c03c00089c0006 */*

/*0010*/ S2R R0, SR_CTAID.X; / 0x86400000129c0002 */*

/*0018*/ S2R R3, SR_TID.X; / 0x86400000109c000e */*

/*0020*/ IMAD R2, R0, c[0x0][0x28], R3; / 0x51080c00051c000a */*

/*0028*/ ISETP.GE.U32.AND P0, PT, R2, c[0x0][0x140], PT; / 0x5b601c00281c081e */*

/*0030*/ @P0 BRA.U 0x12d8; / 0x120000095000023c */*

/*0038*/ @!P0 ISCADD R4, R2, c[0x0][0x148], 0x2; / 0x60c0080029200812 */*

/ 0x08a0b8a010b8a010 */*

/*0048*/ @!P0 IADD R0, R2, c[0x0][0x140]; / 0x6080000028200802 */*

/*0050*/ @!P0 LD R4, [R4]; / 0xc400000000201010 */*

/*0058*/ @!P0 ISCADD R3, R0, c[0x0][0x148], 0x2; / 0x60c008002920000e */*

/*0060*/ @!P0 LD R3, [R3]; / 0xc400000000200c0c */*

/*0068*/ @!P0 IADD R5, R4, 0x5; / 0xc080000002a01015 */*

/*0070*/ @!P0 LOP32I.XOR R4, R5, 0xf0f0f0f0; / 0x2278787878201410 */*

/*0078*/ @!P0 IADD R5, R3, 0x5; / 0xc080000002a00c15 */*

/ 0x08a09c80a010a010 */*

/*0088*/ @!P0 IADD R3, R4, 0x5; / 0xc080000002a0100d */*

/*0090*/ @!P0 LOP32I.XOR R4, R5, 0xf0f0f0f0; / 0x2278787878201410 */*

/*0098*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*00a0*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*00a8*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/*00b0*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*00b8*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/ 0x08a09c80a010a010 */*

/*00c8*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*00d0*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/*00d8*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*00e0*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*00e8*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*00f0*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/*00f8*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/ 0x08a09c80a010a010 */*

/*0108*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*0110*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*0118*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/*0120*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*0128*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*0130*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*0138*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/ 0x08a09c80a010a010 */*

/*0148*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*0150*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*0158*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*0160*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/*0168*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*0170*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*0178*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

…

/*1290*/ @!P0 IADD R3, R3, 0x5; / 0xc080000002a00c0d */*

/*1298*/ @!P0 IADD R4, R4, 0x5; / 0xc080000002a01011 */*

/*12a0*/ @!P0 ISCADD R2, R2, c[0x0][0x14c], 0x2; / 0x60c0080029a0080a */*

/*12a8*/ @!P0 LOP32I.XOR R3, R3, 0xf0f0f0f0; / 0x2278787878200c0c */*

/*12b0*/ @!P0 ISCADD R0, R0, c[0x0][0x14c], 0x2; / 0x60c0080029a00002 */*

/*12b8*/ @!P0 LOP32I.XOR R4, R4, 0xf0f0f0f0; / 0x2278787878201010 */*

/ 0x08000000b81000b8 */*

/*12c8*/ @!P0 ST [R2], R3; / 0xe40000000020080c */*

/*12d0*/ @!P0 ST [R0], R4; / 0xe400000000200010 */*

/*12d8*/ MOV RZ, RZ; / 0xe4c03c007f9c03fe */*

/*12e0*/ EXIT; / 0x18000000001c003c */*

/*12e8*/ BRA 0x12e8; / 0x12007ffffc1c003c */*

/*12f0*/ NOP; / 0x85800000001c3c02 */*

/*12f8*/ NOP; / 0x85800000001c3c02 */