It seems exit can’t be used in the middle of a kernel.
This fails:
.version 1.0
.target compute_10, map_f64_to_f32
.entry adder
{
.param .u32 p_a;
.param .u32 p_b;
.param .s32 p_n;
.reg .u32 $r1,$r2,$r3,$r4,$r5,$r6,$r7,$r8,$r9,$r10,$r11,$r12,$r13,$r14,$r15,$r16,$r17;
.reg .pred $p1;
cvt.u32.u16 $r1,%ctaid.x;
cvt.u32.u16 $r2,%tid.x;
mov.s32 $r4,8;
shl.b32 $r3,$r1,$r4;
add.s32 $r5,$r3,$r2;
ld.param.s32 $r6,p_n;
setp.lt.s32 $p1,$r5,$r6;
@$p1 bra $L1;
exit;
$L1:
mov.s32 $r8,2;
shl.b32 $r7,$r5,$r8;
ld.param.s32 $r10,p_b;
add.s32 $r9,$r10,$r7;
mov.s32 $r12,2;
shl.b32 $r11,$r5,$r12;
ld.param.s32 $r14,p_a;
add.s32 $r13,$r14,$r11;
ld.global.s32 $r16,[$r13];
ld.global.s32 $r17,[$r9];
add.s32 $r15,$r16,$r17;
st.global.s32 [$r13],$r15;
exit;
}
While this works:
.version 1.0
.target compute_10, map_f64_to_f32
.entry adder
{
.param .u32 p_a;
.param .u32 p_b;
.param .s32 p_n;
.reg .u32 $r1,$r2,$r3,$r4,$r5,$r6,$r7,$r8,$r9,$r10,$r11,$r12,$r13,$r14,$r15,$r16,$r17;
.reg .pred $p1;
cvt.u32.u16 $r1,%ctaid.x;
cvt.u32.u16 $r2,%tid.x;
mov.s32 $r4,8;
shl.b32 $r3,$r1,$r4;
add.s32 $r5,$r3,$r2;
ld.param.s32 $r6,p_n;
setp.lt.s32 $p1,$r5,$r6;
@$p1 bra $L1;
bra.uni $L_exit;
$L1:
mov.s32 $r8,2;
shl.b32 $r7,$r5,$r8;
ld.param.s32 $r10,p_b;
add.s32 $r9,$r10,$r7;
mov.s32 $r12,2;
shl.b32 $r11,$r5,$r12;
ld.param.s32 $r14,p_a;
add.s32 $r13,$r14,$r11;
ld.global.s32 $r16,[$r13];
ld.global.s32 $r17,[$r9];
add.s32 $r15,$r16,$r17;
st.global.s32 [$r13],$r15;
$L_exit:exit;
}