I have a simple kernel when profiled using Nsight Compute shows bank conflict, which is correct. I have generated the SASS file from it and want to know if there is any way to detect bank conflicts based on the SASS/PTX files.
The kernel and code segment is shown:
struct s32 {
int a; int b; int c; int d; int e; int f; int g; int h;
};
#define TESTSIZE 1024
template <typename T>
__global__ void test_kernel_conflict(T* d)
{
__shared__ T s[TESTSIZE];
if (threadIdx.x < TESTSIZE / 2)
s[threadIdx.x * 2] = d[threadIdx.x]; // bank conflict
__syncthreads();
if (threadIdx.x >= TESTSIZE / 2)
d[threadIdx.x] = s[threadIdx.x * 2 % TESTSIZE]; // bank conflict
}
The full code can be found here.
and the SASS representation:
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
.elftype @"ET_EXEC"
//--------------------- .text._Z20test_kernel_conflictI3s36EvPT_ --------------------------
.section .text._Z20test_kernel_conflictI3s36EvPT_,"ax",@progbits
.sectionflags @"SHF_BARRIERS=1"
.sectioninfo @"SHI_REGISTERS=17"
.align 128
.global _Z20test_kernel_conflictI3s36EvPT_
.type _Z20test_kernel_conflictI3s36EvPT_,@function
.size _Z20test_kernel_conflictI3s36EvPT_,(.L_x_12 - _Z20test_kernel_conflictI3s36EvPT_)
.other _Z20test_kernel_conflictI3s36EvPT_,@"STO_CUDA_ENTRY STV_DEFAULT"
_Z20test_kernel_conflictI3s36EvPT_:
.text._Z20test_kernel_conflictI3s36EvPT_:
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 61
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 65
/*0010*/ S2R R14, SR_TID.X ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 69
/*0020*/ IMAD.MOV.U32 R3, RZ, RZ, 0x24 ;
/*0030*/ BMOV.32.CLEAR RZ, B0 ;
/*0040*/ BSSY B0, `(.L_x_0) ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 65
/*0050*/ ISETP.GT.U32.AND P0, PT, R14.reuse, 0x1ff, PT ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 69
/*0060*/ IMAD.WIDE.U32 R2, R14.reuse, R3, c[0x0][0x160] ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 68
/*0070*/ ISETP.GE.U32.AND P1, PT, R14, 0x200, PT ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 65
/*0080*/ @P0 BRA `(.L_x_1) ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 66
/*0090*/ LDG.E.SYS R4, [R2] ;
/*00a0*/ LDG.E.SYS R5, [R2+0x4] ;
/*00b0*/ LDG.E.SYS R6, [R2+0x8] ;
/*00c0*/ LDG.E.SYS R7, [R2+0xc] ;
/*00d0*/ LDG.E.SYS R8, [R2+0x10] ;
/*00e0*/ LDG.E.SYS R9, [R2+0x14] ;
/*00f0*/ LDG.E.SYS R10, [R2+0x18] ;
/*0100*/ LDG.E.SYS R11, [R2+0x1c] ;
/*0110*/ LDG.E.SYS R12, [R2+0x20] ;
/*0120*/ SHF.L.U32 R0, R14, 0x1, RZ ;
/*0130*/ IMAD R13, R0, 0x24, RZ ;
/*0140*/ STS.64 [R13], R4 ;
/*0150*/ STS.64 [R13+0x8], R6 ;
/*0160*/ STS.64 [R13+0x10], R8 ;
/*0170*/ STS.64 [R13+0x18], R10 ;
/*0180*/ STS [R13+0x20], R12 ;
.L_x_1:
/*0190*/ BSYNC B0 ;
.L_x_0:
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 67
/*01a0*/ BAR.SYNC 0x0 ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 68
/*01b0*/ @!P1 EXIT ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 69
/*01c0*/ SHF.L.U32 R0, R14, 0x1, RZ ;
/*01d0*/ LOP3.LUT R0, R0, 0x3fe, RZ, 0xc0, !PT ;
/*01e0*/ IMAD R0, R0, 0x24, RZ ;
/*01f0*/ LDS.U.64 R4, [R0] ;
/*0200*/ LDS.U.64 R6, [R0+0x8] ;
/*0210*/ LDS.U.64 R8, [R0+0x10] ;
/*0220*/ LDS.U.64 R10, [R0+0x18] ;
/*0230*/ LDS.U R13, [R0+0x20] ;
/*0240*/ STG.E.SYS [R2], R4 ;
/*0250*/ STG.E.SYS [R2+0x4], R5 ;
/*0260*/ STG.E.SYS [R2+0x8], R6 ;
/*0270*/ STG.E.SYS [R2+0xc], R7 ;
/*0280*/ STG.E.SYS [R2+0x10], R8 ;
/*0290*/ STG.E.SYS [R2+0x14], R9 ;
/*02a0*/ STG.E.SYS [R2+0x18], R10 ;
/*02b0*/ STG.E.SYS [R2+0x1c], R11 ;
/*02c0*/ STG.E.SYS [R2+0x20], R13 ;
//## File "/home/sen/Documents/Development/hpctoolkit/sen_examples/Bank_conflict/bank_conflict.cu", line 70
/*02d0*/ EXIT ;
.L_x_2:
/*02e0*/ BRA `(.L_x_2);
/*02f0*/ NOP;
.L_x_12:
I would want to know is by looking at the SASS file, is there a way to know about the read pattern to the shared memory that is causing this bank conflict.