Hi, the Optix community,
I encountered a bug when trying to use __int128 in Optix. A compile error is thrown when calling optixModuleCreateFromPTX.
struct LaunchParams {
uint32_t width;
uint32_t height;
uint32_t depth;
float3 *vertices;
uint3 *indices;
OptixTraversableHandle traversable;
__int128 a, b, c;
};
extern "C" __global__ void __anyhit__radiance() {
double f = -optixLaunchParams.a * optixLaunchParams.b / optixLaunchParams.c; // This makes optix failed to compile ptx code
printf("%lf\n", f);
}
.visible .entry __anyhit__radiance()
{
.local .align 8 .b8 __local_depot1[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<22>;
.reg .b32 %r<417>;
.reg .f64 %fd<2>;
.reg .b64 %rd<47>;
.loc 1 38 0
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
add.u64 %rd13, %SP, 0;
add.u64 %rd1, %SPL, 0;
.loc 1 54 18
ld.const.v2.u64 {%rd14, %rd15}, [optixLaunchParams+48];
mov.u64 %rd5, 0;
sub.cc.s64 %rd18, %rd5, %rd14;
subc.cc.s64 %rd19, %rd5, %rd15;
ld.const.v2.u64 {%rd20, %rd21}, [optixLaunchParams+64];
mul.lo.s64 %rd24, %rd18, %rd21;
mul.hi.u64 %rd25, %rd18, %rd20;
add.s64 %rd26, %rd25, %rd24;
mul.lo.s64 %rd27, %rd19, %rd20;
add.s64 %rd28, %rd26, %rd27;
mul.lo.s64 %rd29, %rd18, %rd20;
ld.const.v2.u64 {%rd30, %rd31}, [optixLaunchParams+80];
cvt.u32.u64 %r82, %rd29;
shr.u64 %rd34, %rd29, 32;
cvt.u32.u64 %r85, %rd34;
mov.u32 %r194, 0;
cvt.u32.u64 %r88, %rd28;
shr.u64 %rd35, %rd28, 32;
cvt.u32.u64 %r91, %rd35;
cvt.u32.u64 %r94, %rd30;
mov.b64 {%r195, %r97}, %rd30;
mov.b64 {%r100, %r103}, %rd31;
shr.u64 %rd2, %rd28, 63;
// begin inline asm
sub.cc.u32 %r80,%r194,%r82;
// end inline asm
// begin inline asm
subc.cc.u32 %r83,%r194,%r85;
// end inline asm
// begin inline asm
subc.cc.u32 %r86,%r194,%r88;
// end inline asm
// begin inline asm
subc.u32 %r89,%r194,%r91;
// end inline asm
setp.eq.s64 %p1, %rd2, 0;
selp.b32 %r125, %r82, %r80, %p1;
selp.b32 %r129, %r85, %r83, %p1;
selp.b32 %r133, %r88, %r86, %p1;
selp.b32 %r137, %r91, %r89, %p1;
shr.u64 %rd4, %rd31, 63;
// begin inline asm
sub.cc.u32 %r92,%r194,%r94;
// end inline asm
// begin inline asm
subc.cc.u32 %r95,%r194,%r97;
// end inline asm
// begin inline asm
subc.cc.u32 %r98,%r194,%r100;
// end inline asm
// begin inline asm
subc.u32 %r101,%r194,%r103;
// end inline asm
setp.eq.s64 %p2, %rd4, 0;
selp.b32 %r196, %r94, %r92, %p2;
selp.b32 %r197, %r97, %r95, %p2;
selp.b32 %r198, %r100, %r98, %p2;
selp.b32 %r199, %r103, %r101, %p2;
or.b32 %r200, %r199, %r198;
setp.eq.s32 %p3, %r200, 0;
selp.b32 %r201, %r197, %r199, %p3;
selp.b32 %r202, %r196, %r198, %p3;
selp.b32 %r203, 0, %r197, %p3;
selp.b32 %r204, 0, %r196, %p3;
selp.b32 %r205, 64, 0, %p3;
setp.eq.s32 %p4, %r201, 0;
selp.b32 %r106, %r202, %r201, %p4;
selp.b32 %r110, %r203, %r202, %p4;
selp.b32 %r114, %r204, %r203, %p4;
selp.b32 %r118, 0, %r204, %p4;
selp.b32 %r206, 32, 0, %p4;
or.b32 %r207, %r206, %r205;
clz.b32 %r208, %r106;
add.s32 %r139, %r207, %r208;
// begin inline asm
shf.l.wrap.b32 %r104,%r110,%r106,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r108,%r114,%r110,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r112,%r118,%r114,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r116,%r194,%r118,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r392,%r194,%r125,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r391,%r125,%r129,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r390,%r129,%r133,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r389,%r133,%r137,%r139;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r388,%r137,%r194,%r139;
// end inline asm
// begin inline asm
mov.b64 %rd6,{%r108,%r104};
// end inline asm
// begin inline asm
{
.reg .f32 ftmp;
.reg .u32 qtmp;
prmt.b32 ftmp,%r104,0x3F,0x4321;
rcp.approx.f32 ftmp,ftmp;
mov.b32 qtmp,ftmp;
mad.lo.u32 %r142,qtmp,512,0xFFFFFE00;
}
// end inline asm
mul.hi.u32 %r209, %r142, %r108;
cvt.u64.u32 %rd36, %r209;
// begin inline asm
mad.wide.u32 %rd7,%r142,%r104,%rd6;
// end inline asm
add.s64 %rd37, %rd7, %rd36;
neg.s64 %rd9, %rd37;
// begin inline asm
mov.b64 {%r146,%r147},%rd9;
// end inline asm
setp.gt.s32 %p5, %r147, -1;
selp.b32 %r174, %r142, 0, %p5;
shr.u32 %r210, %r147, 31;
xor.b32 %r177, %r210, 1;
mov.u32 %r151, -2147483648;
// begin inline asm
mad.hi.cc.u32 %r148,%r142,%r146,%r151;
// end inline asm
// begin inline asm
addc.u32 %r152,%r194,%r194;
// end inline asm
// begin inline asm
mad.lo.cc.u32 %r155,%r142,%r147,%r148;
// end inline asm
// begin inline asm
madc.hi.u32 %r159,%r142,%r147,%r152;
// end inline asm
// begin inline asm
add.cc.u32 %r163,%r155,%r146;
// end inline asm
// begin inline asm
addc.cc.u32 %r166,%r159,%r147;
// end inline asm
// begin inline asm
addc.u32 %r169,%r194,%r194;
// end inline asm
// begin inline asm
add.cc.u32 %r172,%r166,%r174;
// end inline asm
// begin inline asm
addc.u32 %r175,%r169,%r177;
// end inline asm
setp.eq.s32 %p6, %r175, 2;
selp.b32 %r178, -1, %r172, %p6;
mul.hi.u32 %r211, %r178, %r108;
cvt.u64.u32 %rd11, %r211;
// begin inline asm
mad.wide.u32 %rd10,%r178,%r104,%rd11;
// end inline asm
// begin inline asm
mov.b64 {%r180,%r181},%rd10;
// end inline asm
// begin inline asm
add.cc.u32 %r182,%r180,%r108;
// end inline asm
// begin inline asm
addc.cc.u32 %r185,%r181,%r104;
// end inline asm
// begin inline asm
addc.u32 %r188,%r194,%r194;
// end inline asm
setp.eq.s32 %p7, %r188, 0;
setp.ne.s32 %p8, %r178, -1;
and.pred %p9, %p8, %p7;
selp.u32 %r212, 1, 0, %p9;
add.s32 %r11, %r178, %r212;
mov.u32 %r386, %r194;
mov.u32 %r400, %r194;
mov.u32 %r410, %r194;
mov.u32 %r411, %r194;
$L__BB1_1:
.pragma "nounroll";
mov.u32 %r412, %r411;
mov.u32 %r411, %r410;
mov.u32 %r410, %r400;
// begin inline asm
sub.cc.u32 %r213,%r390,%r108;
// end inline asm
// begin inline asm
subc.cc.u32 %r216,%r389,%r104;
// end inline asm
mov.u32 %r221, -2;
// begin inline asm
subc.u32 %r219,%r388,%r221;
// end inline asm
// begin inline asm
mad.hi.u32 %r222,%r388,%r11,%r219;
// end inline asm
setp.lt.u32 %p10, %r222, %r388;
selp.b32 %r400, -1, %r222, %p10;
// begin inline asm
mad.lo.cc.u32 %r226,%r400,%r108,%r194;
// end inline asm
// begin inline asm
madc.hi.u32 %r230,%r400,%r108,%r194;
// end inline asm
// begin inline asm
mad.lo.cc.u32 %r234,%r400,%r104,%r230;
// end inline asm
// begin inline asm
madc.hi.u32 %r238,%r400,%r104,%r194;
// end inline asm
// begin inline asm
sub.cc.u32 %r401,%r390,%r226;
// end inline asm
// begin inline asm
subc.cc.u32 %r402,%r389,%r234;
// end inline asm
// begin inline asm
subc.cc.u32 %r403,%r388,%r238;
// end inline asm
// begin inline asm
addc.u32 %r399,%r194,%r194;
// end inline asm
setp.ne.s32 %p11, %r399, 0;
@%p11 bra $L__BB1_3;
// begin inline asm
add.cc.u32 %r401,%r401,%r108;
// end inline asm
// begin inline asm
addc.cc.u32 %r402,%r402,%r104;
// end inline asm
mov.u32 %r265, 0;
// begin inline asm
addc.cc.u32 %r403,%r403,%r265;
// end inline asm
// begin inline asm
addc.u32 %r399,%r265,%r265;
// end inline asm
add.s32 %r400, %r400, -1;
$L__BB1_3:
setp.ne.s32 %p12, %r399, 0;
@%p12 bra $L__BB1_5;
// begin inline asm
add.cc.u32 %r401,%r401,%r108;
// end inline asm
// begin inline asm
addc.cc.u32 %r402,%r402,%r104;
// end inline asm
mov.u32 %r274, 0;
// begin inline asm
addc.u32 %r403,%r403,%r274;
// end inline asm
add.s32 %r400, %r400, -1;
$L__BB1_5:
mov.u32 %r308, 0;
// begin inline asm
mad.lo.cc.u32 %r275,%r400,%r116,%r308;
// end inline asm
// begin inline asm
madc.hi.u32 %r279,%r400,%r116,%r308;
// end inline asm
// begin inline asm
mad.lo.cc.u32 %r283,%r400,%r112,%r279;
// end inline asm
// begin inline asm
madc.hi.u32 %r287,%r400,%r112,%r308;
// end inline asm
// begin inline asm
sub.cc.u32 %r404,%r392,%r275;
// end inline asm
// begin inline asm
subc.cc.u32 %r390,%r391,%r283;
// end inline asm
// begin inline asm
subc.cc.u32 %r389,%r401,%r287;
// end inline asm
// begin inline asm
subc.cc.u32 %r388,%r402,%r308;
// end inline asm
// begin inline asm
subc.cc.u32 %r303,%r403,%r308;
// end inline asm
// begin inline asm
subc.u32 %r306,%r308,%r308;
// end inline asm
setp.ne.s32 %p13, %r306, -1;
@%p13 bra $L__BB1_7;
add.s32 %r400, %r400, -1;
// begin inline asm
add.cc.u32 %r404,%r404,%r116;
// end inline asm
// begin inline asm
addc.cc.u32 %r390,%r390,%r112;
// end inline asm
// begin inline asm
addc.cc.u32 %r389,%r389,%r108;
// end inline asm
// begin inline asm
addc.u32 %r388,%r388,%r104;
// end inline asm
$L__BB1_7:
add.s32 %r386, %r386, 32;
setp.le.u32 %p14, %r386, %r139;
mov.u32 %r391, %r404;
mov.u32 %r392, %r308;
@%p14 bra $L__BB1_1;
xor.b64 %rd38, %rd2, %rd4;
setp.eq.s64 %p15, %rd38, 0;
@%p15 bra $L__BB1_10;
mov.u32 %r332, 0;
// begin inline asm
sub.cc.u32 %r400,%r332,%r400;
// end inline asm
// begin inline asm
subc.cc.u32 %r410,%r332,%r410;
// end inline asm
// begin inline asm
subc.cc.u32 %r411,%r332,%r411;
// end inline asm
// begin inline asm
subc.u32 %r412,%r332,%r412;
// end inline asm
$L__BB1_10:
setp.eq.s32 %p16, %r139, 128;
selp.b32 %r416, -1, %r412, %p16;
selp.b32 %r415, -1, %r411, %p16;
selp.b32 %r414, -1, %r410, %p16;
selp.b32 %r413, -1, %r400, %p16;
and.b32 %r71, %r416, -2147483648;
setp.eq.s32 %p17, %r71, 0;
@%p17 bra $L__BB1_12;
mov.u32 %r344, 0;
// begin inline asm
sub.cc.u32 %r413,%r344,%r413;
// end inline asm
// begin inline asm
subc.cc.u32 %r414,%r344,%r414;
// end inline asm
// begin inline asm
subc.cc.u32 %r415,%r344,%r415;
// end inline asm
// begin inline asm
subc.u32 %r416,%r344,%r416;
// end inline asm
$L__BB1_12:
or.b32 %r372, %r416, %r415;
setp.eq.s32 %p18, %r372, 0;
mov.u32 %r359, 0;
selp.b32 %r373, %r414, %r416, %p18;
selp.b32 %r374, %r413, %r415, %p18;
selp.b32 %r375, 0, %r414, %p18;
selp.b32 %r376, 0, %r413, %p18;
selp.b32 %r377, 64, 0, %p18;
setp.eq.s32 %p19, %r373, 0;
selp.b32 %r348, %r374, %r373, %p19;
selp.b32 %r352, %r375, %r374, %p19;
selp.b32 %r356, %r376, %r375, %p19;
selp.b32 %r360, 0, %r376, %p19;
selp.b32 %r378, 32, 0, %p19;
or.b32 %r379, %r378, %r377;
clz.b32 %r380, %r348;
add.s32 %r361, %r379, %r380;
// begin inline asm
shf.l.wrap.b32 %r346,%r352,%r348,%r361;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r350,%r356,%r352,%r361;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r354,%r360,%r356,%r361;
// end inline asm
// begin inline asm
shf.l.wrap.b32 %r358,%r359,%r360,%r361;
// end inline asm
shl.b32 %r381, %r361, 20;
or.b32 %r382, %r358, %r354;
setp.ne.s32 %p20, %r382, 0;
selp.u32 %r383, 1, 0, %p20;
or.b32 %r362, %r350, %r383;
// begin inline asm
mov.b64 %rd39,{%r362,%r346};
// end inline asm
cvt.rn.f64.u64 %fd1, %rd39;
mov.b64 %rd40, %fd1;
// begin inline asm
mov.b64 {%r364,%r365},%rd40;
// end inline asm
add.s32 %r384, %r365, 67108864;
sub.s32 %r367, %r384, %r381;
// begin inline asm
mov.b64 %rd41,{%r364,%r367};
// end inline asm
setp.eq.s32 %p21, %r361, 128;
selp.b64 %rd42, 0, %rd41, %p21;
// begin inline asm
mov.b64 {%r368,%r369},%rd42;
// end inline asm
or.b32 %r371, %r369, %r71;
// begin inline asm
mov.b64 %rd43,{%r368,%r371};
// end inline asm
.loc 1 55 9
st.local.u64 [%rd1], %rd43;
mov.u64 %rd44, $str;
cvta.global.u64 %rd45, %rd44;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd45;
.param .b64 param1;
st.param.b64 [param1+0], %rd13;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r385, [retval0+0];
} // callseq 0
.loc 1 56 5
ret;
}
I make a minimal code that produces this bug: GitHub - pwrliang/optix7course at optixbug
Environment:
CUDA 11.8
Optix 7.6
GCC 9.4.0
Driver Version: 520.61.05
OS: Ubuntu 20.04.5