Hello,
I am currently trying to get cuRAND to run with NVRTC. I built the program und I am getting the following PTX code. Can anybody tell me why it is invalid?
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-19856038
// Cuda compilation tools, release 7.5, V7.5.17
// Based on LLVM 3.4svn
//
.version 4.3
.target sm_20
.address_size 64
// .globl rngSetupStates
.extern .func _Z11curand_inityyyP17curandStateXORWOW
(
.param .b64 _Z11curand_inityyyP17curandStateXORWOW_param_0,
.param .b64 _Z11curand_inityyyP17curandStateXORWOW_param_1,
.param .b64 _Z11curand_inityyyP17curandStateXORWOW_param_2,
.param .b64 _Z11curand_inityyyP17curandStateXORWOW_param_3
)
;
.visible .entry rngSetupStates(
.param .u64 rngSetupStates_param_0,
.param .u32 rngSetupStates_param_1
)
{
.reg .b32 %r<8>;
.reg .b64 %rd<7>;
ld.param.u64 %rd1, [rngSetupStates_param_0];
ld.param.u32 %r1, [rngSetupStates_param_1];
mov.u32 %r2, %tid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %ntid.x;
mad.lo.s32 %r5, %r4, %r3, %r2;
mov.u32 %r6, %nctaid.x;
mad.lo.s32 %r7, %r6, %r1, %r3;
cvt.u64.u32 %rd2, %r7;
cvt.u64.u32 %rd3, %r2;
mul.wide.s32 %rd4, %r5, 48;
add.s64 %rd5, %rd1, %rd4;
mov.u64 %rd6, 0;
// Callseq Start 0
{
.reg .b32 temp_param_reg;
// <end>}
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b64 param1;
st.param.b64 [param1+0], %rd3;
.param .b64 param2;
st.param.b64 [param2+0], %rd6;
.param .b64 param3;
st.param.b64 [param3+0], %rd5;
call.uni
_Z11curand_inityyyP17curandStateXORWOW,
(
param0,
param1,
param2,
param3
);
//{
}// Callseq End 0
ret;
}