1. Is it possible to disable compiler optimizations that discards any code that has no impact on global state? I understand the resulting code will be sub-optimal in resource usage and efficiency.
Yes and no. I was able to minimize compiler optimizations by setting
-Xptxas -O0
in the make process. You can see in the code snippet below that in Case 2 the number of registers used increased by 50% and the line of SASS nearly tripled. But Case 1 is unchanged. I’m not sure of a way to change this.
belt@orion:~/eclipse-workspace/simpleTest/Release$ cuobjdump -sass -res-usage ./simpleTest
Fatbin elf code:
================
arch = sm_70
code version = [1,7]
producer = <unknown>
host = linux
compile_size = 64bit
Resource usage:
Common:
GLOBAL:0
code for sm_70
Fatbin ptx code:
================
arch = sm_70
code version = [6,5]
producer = <unknown>
host = linux
compile_size = 64bit
compressed
ptxasOptions = -O0
Fatbin elf code:
================
arch = sm_70
code version = [1,7]
producer = <unknown>
host = linux
compile_size = 64bit
Resource usage:
Common:
GLOBAL:0
Function _Z5case2iPfS_:
REG:12 STACK:0 SHARED:0 LOCAL:0 CONSTANT[0]:376 TEXTURE:0 SURFACE:0 SAMPLER:0
Function _Z5case1iPfS_:
REG:4 STACK:0 SHARED:0 LOCAL:0 CONSTANT[0]:376 TEXTURE:0 SURFACE:0 SAMPLER:0
code for sm_70
Function : _Z5case2iPfS_
.headerflags @"EF_CUDA_SM70 EF_CUDA_PTX_SM(EF_CUDA_SM70)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x003fde0000000f00 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0020*/ MOV R2, 0x168 ; /* 0x0000016800027802 */
/* 0x003fde0000000f00 */
/*0030*/ LDC.64 R2, c[0x0][R2] ; /* 0x0000000002027b82 */
/* 0x00321e0000000a00 */
/*0040*/ MOV R6, R2 ; /* 0x0000000200067202 */
/* 0x003fde0000000f00 */
/*0050*/ MOV R7, R3 ; /* 0x0000000300077202 */
/* 0x003fde0000000f00 */
/*0060*/ MOV R6, R6 ; /* 0x0000000600067202 */
/* 0x003fde0000000f00 */
/*0070*/ MOV R7, R7 ; /* 0x0000000700077202 */
/* 0x003fde0000000f00 */
/*0080*/ MOV R2, 0x170 ; /* 0x0000017000027802 */
/* 0x003fde0000000f00 */
/*0090*/ LDC.64 R2, c[0x0][R2] ; /* 0x0000000002027b82 */
/* 0x00321e0000000a00 */
/*00a0*/ MOV R8, R2 ; /* 0x0000000200087202 */
/* 0x003fde0000000f00 */
/*00b0*/ MOV R9, R3 ; /* 0x0000000300097202 */
/* 0x003fde0000000f00 */
/*00c0*/ MOV R8, R8 ; /* 0x0000000800087202 */
/* 0x003fde0000000f00 */
/*00d0*/ MOV R9, R9 ; /* 0x0000000900097202 */
/* 0x003fde0000000f00 */
/*00e0*/ MOV R8, R8 ; /* 0x0000000800087202 */
/* 0x003fde0000000f00 */
/*00f0*/ MOV R9, R9 ; /* 0x0000000900097202 */
/* 0x003fde0000000f00 */
/*0100*/ MOV R6, R6 ; /* 0x0000000600067202 */
/* 0x003fde0000000f00 */
/*0110*/ MOV R7, R7 ; /* 0x0000000700077202 */
/* 0x003fde0000000f00 */
/*0120*/ S2R R0, SR_CTAID.X ; /* 0x0000000000007919 */
/* 0x00321e0000002500 */
/*0130*/ MOV R0, R0 ; /* 0x0000000000007202 */
/* 0x003fde0000000f00 */
/*0140*/ MOV R2, c[0x0][0x0] ; /* 0x0000000000027a02 */
/* 0x003fde0000000f00 */
/*0150*/ S2R R3, SR_TID.X ; /* 0x0000000000037919 */
/* 0x00321e0000002100 */
/*0160*/ MOV R3, R3 ; /* 0x0000000300037202 */
/* 0x003fde0000000f00 */
/*0170*/ IMAD R2, R2, R0, R3 ; /* 0x0000000002027224 */
/* 0x003fde00078e0203 */
/*0180*/ IMAD.WIDE R2, R2, 0x4, RZ ; /* 0x0000000402027825 */
/* 0x003fde00078e02ff */
/*0190*/ MOV R4, R2 ; /* 0x0000000200047202 */
/* 0x003fde0000000f00 */
/*01a0*/ MOV R5, R3 ; /* 0x0000000300057202 */
/* 0x003fde0000000f00 */
/*01b0*/ IADD3 R2, P0, R6, R4, RZ ; /* 0x0000000406027210 */
/* 0x003fde0007f1e0ff */
/*01c0*/ IADD3.X R3, R7, R5, RZ, P0, !PT ; /* 0x0000000507037210 */
/* 0x003fde00007fe4ff */
/*01d0*/ MOV R2, R2 ; /* 0x0000000200027202 */
/* 0x003fde0000000f00 */
/*01e0*/ MOV R3, R3 ; /* 0x0000000300037202 */
/* 0x003fde0000000f00 */
/*01f0*/ MOV R2, R2 ; /* 0x0000000200027202 */
/* 0x003fde0000000f00 */
/*0200*/ MOV R3, R3 ; /* 0x0000000300037202 */
/* 0x003fde0000000f00 */
/*0210*/ LDG.E.SYS R0, [R2] ; /* 0x0000000002007381 */
/* 0x00321e00001ee900 */
/*0220*/ IADD3 R2, P0, R8, R4, RZ ; /* 0x0000000408027210 */
/* 0x003fde0007f1e0ff */
/*0230*/ IADD3.X R3, R9, R5, RZ, P0, !PT ; /* 0x0000000509037210 */
/* 0x003fde00007fe4ff */
/*0240*/ MOV R2, R2 ; /* 0x0000000200027202 */
/* 0x003fde0000000f00 */
/*0250*/ MOV R3, R3 ; /* 0x0000000300037202 */
/* 0x003fde0000000f00 */
/*0260*/ MOV R2, R2 ; /* 0x0000000200027202 */
/* 0x003fde0000000f00 */
/*0270*/ MOV R3, R3 ; /* 0x0000000300037202 */
/* 0x003fde0000000f00 */
/*0280*/ STG.E.SYS [R2], R0 ; /* 0x0000000002007386 */
/* 0x0033de000010e900 */
/*0290*/ EXIT ; /* 0x000000000000794d */
/* 0x003fde0003800000 */
/*02a0*/ BRA 0x2a0; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/*02b0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*02c0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*02d0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*02e0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*02f0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
........................
Function : _Z5case1iPfS_
.headerflags @"EF_CUDA_SM70 EF_CUDA_PTX_SM(EF_CUDA_SM70)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x003fde0000000f00 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0020*/ EXIT ; /* 0x000000000000794d */
/* 0x003fde0003800000 */
/*0030*/ BRA 0x30; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/*0040*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0050*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0060*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0070*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
........................