where is the entry parameter?

where is the entry parameter?
shared memory? constant memory? register?

C/C++ code

global static void HelloCUDA(char* result, int num)
{
shared int i;
i = 0;
char p_HelloCUDA = “Hello CUDA!”;
for(i = 0; i < num; i++) {
result[i] = p_HelloCUDA[i];
}
}

ptx code

.const .align 1 .b8 __constant432[12] = {0x48,0x65,0x6c,0x6c,0x6f,0x20,0x43,0x55,0x44,0x41,0x21,0x0}

;

.entry _Z9HelloCUDAPci
{
.reg .u16 %rh<3>;
.reg .u32 %r<16>;
.reg .pred %p<4>;
.param .u32 __cudaparm__Z9HelloCUDAPci_result;
.param .s32 __cudaparm__Z9HelloCUDAPci_num;
.local .align 4 .b8 __cuda___cuda_p_HelloCUDA_168[12];
.shared .s32 i;
.loc    14    15    0

$LBB1__Z9HelloCUDAPci:
mov.u32 %r1, __constant432; //
mov.u32 %r2, __cuda___cuda_p_HelloCUDA_168; //
ld.const.u32 %r3, [%r1+0]; // id:17 not_variable+0x0
st.local.u32 [%r2+0], %r3; // id:18 __cuda___cuda_p_HelloCUDA_168+0x0
ld.const.u32 %r4, [%r1+4]; // id:17 not_variable+0x0
st.local.u32 [%r2+4], %r4; // id:18 __cuda___cuda_p_HelloCUDA_168+0x0
ld.const.u32 %r5, [%r1+8]; // id:17 not_variable+0x0
st.local.u32 [%r2+8], %r5; // id:18 __cuda___cuda_p_HelloCUDA_168+0x0
.loc 14 20 0
mov.s32 %r6, 0; //
ld.param.s32 %r7, [__cudaparm__Z9HelloCUDAPci_num]; // id:16 __cudaparm__Z9HelloCUDAPci_num+0x0
mov.u32 %r8, 0; //
setp.le.s32 %p1, %r7, %r8; //
@%p1 bra $Lt_0_9; //
mov.s32 %r9, %r7; //
mov.u32 %r10, __cuda___cuda_p_HelloCUDA_168; //
mov.u32 %r11, __cuda___cuda_p_HelloCUDA_168; //
add.u32 %r12, %r7, %r11; //
ld.param.u32 %r13, [__cudaparm__Z9HelloCUDAPci_result]; // id:19 __cudaparm__Z9HelloCUDAPci_result+0x0
mov.s32 %r14, %r9; //
$Lt_0_7:
// Loop body line 20, nesting depth: 1, estimated iterations: unknown
.loc 14 21 0
ld.local.s8 %rh1, [%r10+0]; // id:20 __cuda___cuda_p_HelloCUDA_168+0x0
st.global.s8 [%r13+0], %rh1; // id:21
add.u32 %r13, %r13, 1; //
add.u32 %r10, %r10, 1; //
setp.ne.s32 %p2, %r10, %r12; //
@%p2 bra $Lt_0_7; //
st.shared.s32 [i], %r7; // id:22 i+0x0
bra.uni $Lt_0_5; //
$Lt_0_9:
st.shared.s32 [i], %r6; // id:22 i+0x0
$Lt_0_5:
.loc 14 23 0
exit; //
$LDWend__Z9HelloCUDAPci:
} // _Z9HelloCUDAPci

cubin code

architecture {sm_10}
abiversion {1}
modname {cubin}
consts {
name = __constant432
segname = const
segnum = 0
offset = 0
bytes = 12
mem {
0x6c6c6548 0x5543206f 0x00214144
}
}
code {
name = _Z9HelloCUDAPci
lmem = 12
smem = 28
reg = 3
bar = 0
bincode {
0x10000001 0x2400c780 0xd0000001 0x60c00780
0x10000201 0x2400c780 0xd0000801 0x60c00780
0x10000401 0x2400c780 0x307ccbfd 0x6c20c7c8
0xd0001001 0x60c00780 0x10014003 0x00000280
0x1000f801 0x0403c780 0x1000c805 0x0423c780
0x00000005 0xc0000780 0xd4000009 0x40200780
0x20018001 0x00000003 0xd00e0209 0xa0200780
0x3000cbfd 0x6c2147c8 0x20018205 0x00000003
0x1000a003 0x00000280 0x1000ca01 0x0423c780
0x00000c01 0xe4200780 0x30000003 0x00000780
0x00000c01 0xe43f0781
}
}

where are the global function parameters?

i guess those params go from global memory → (constant memory or shared memory)broadcast to -->then each threads register?

it’s right?

is there a on chip parameter register stack(PRS) ??
PARAM transfer path:host->device->PRS?

ti’s shared memory