// // Generated by NVIDIA PTX Backend for LLVM // .version 1.5 .target sm_13, texmode_independent /* Global Variables*/ .shared .align 4 .b8 shr_1_tmp[256]; .global .samplerref %sam_sampler_norm_0 = { addr_mode_0 = clamp_to_edge, addr_mode_1 = clamp_to_edge, addr_mode_2 = clamp_to_edge, filter_mode = nearest }; .shared .align 1 .b8 shr_2_buf1[256]; .shared .align 1 .b8 shr_3_buf2[256]; /* Global Launch Offsets */ .const[0] .s32 %_global_block_offset[3]; .const[0] .s32 %_global_launch_offset[3]; .const[0] .s32 %_global_num_groups[3]; .const[0] .s32 %_global_size[3]; .const[0] .u32 %_work_dim; /* Temporary variables for v2load/v4load/read */ .local .b8 vector_load_8[4]; .local .b16 vector_load_16[4]; .local .b32 vector_load_32[4]; .local .b64 vector_load_64[4]; /* Function Prototypes */ .func loadBlock (.reg .b32 ptxbe_rdo_image, .reg .u32 ptxbe_def_x_addr1, .reg .u32 ptxbe_def_y_addr2, .reg .u32 ptxbe_def_tid_addr3, .reg .b32 ptxbe_def_target) .global .texref test_param_0; .const[0] .u32 test_param_0_imgInfo[5]; .global .texref test_param_1; .const[0] .u32 test_param_1_imgInfo[5]; .const[0] .b32 %_dummy_const; /* needed to avoid an assert in driver */ /* Function Bodies */ .entry vec_add ( .param .b32 vec_add_param_0, .param .b32 vec_add_param_1, .param .b32 vec_add_param_2, .param .u32 vec_add_param_3) { .reg .b32 ptxbe_def_out; .reg .b32 ptxbe_def_a; .reg .b32 ptxbe_def_b; .reg .u32 ptxbe_def_count_addr4; /* Local variables */ .reg .s32 ltmp_0_1; .reg .s32 ltmp_1_1; .reg .s32 ltmp_2_1; .reg .s32 ltmp_3_1; .reg .s32 ltmp_4_1; .reg .pred ltmp_5_2; .reg .b32 ltmp_6_3; .reg .b32 ltmp_7_3; .reg .s32 ltmp_8_1; .reg .b32 ltmp_9_3; .reg .s32 ltmp_10_1; .reg .s32 ltmp_11_1; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [vec_add_param_0]; ld.param.b32 ptxbe_def_a, [vec_add_param_1]; ld.param.b32 ptxbe_def_b, [vec_add_param_2]; ld.param.u32 ptxbe_def_count_addr4, [vec_add_param_3]; /* ptx_get_global_id */ .reg .u32 %intr_gid_0,%intr_gid_1,%intr_gid_2, %intr_gid_3,%intr_gid_4; .reg .pred %intr_gid_5,%intr_gid_6,%intr_gid_7; setp.eq.u32 %intr_gid_5,0,0; setp.eq.u32 %intr_gid_6,0,1; setp.eq.u32 %intr_gid_7,0,2; @%intr_gid_5 cvt.u32.u16 %intr_gid_0,%tid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_0,%tid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_0,%tid.z; @%intr_gid_5 cvt.u32.u16 %intr_gid_1,%ntid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_1,%ntid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_1,%ntid.z; @%intr_gid_5 cvt.u32.u16 %intr_gid_2,%ctaid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_2,%ctaid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_2,%ctaid.z; mad.lo.u32 ltmp_0_1,%intr_gid_1,%intr_gid_2,%intr_gid_0; mov.s32 %intr_gid_3,0; mul.lo.s32 %intr_gid_3,%intr_gid_3,4; mov.s32 %intr_gid_4,%_global_launch_offset; add.s32 %intr_gid_4,%intr_gid_3,%intr_gid_4; ld.const.s32 %intr_gid_3,[%intr_gid_4]; add.s32 ltmp_0_1,ltmp_0_1,%intr_gid_3; /* end ptx_get_global_id */ /* ptx_get_global_id */ .reg .u32 %intr_gid_8,%intr_gid_9,%intr_gid_10, %intr_gid_11,%intr_gid_12; .reg .pred %intr_gid_13,%intr_gid_14,%intr_gid_15; setp.eq.u32 %intr_gid_13,1,0; setp.eq.u32 %intr_gid_14,1,1; setp.eq.u32 %intr_gid_15,1,2; @%intr_gid_13 cvt.u32.u16 %intr_gid_8,%tid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_8,%tid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_8,%tid.z; @%intr_gid_13 cvt.u32.u16 %intr_gid_9,%ntid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_9,%ntid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_9,%ntid.z; @%intr_gid_13 cvt.u32.u16 %intr_gid_10,%ctaid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_10,%ctaid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_10,%ctaid.z; mad.lo.u32 ltmp_1_1,%intr_gid_9,%intr_gid_10,%intr_gid_8; mov.s32 %intr_gid_11,1; mul.lo.s32 %intr_gid_11,%intr_gid_11,4; mov.s32 %intr_gid_12,%_global_launch_offset; add.s32 %intr_gid_12,%intr_gid_11,%intr_gid_12; ld.const.s32 %intr_gid_11,[%intr_gid_12]; add.s32 ltmp_1_1,ltmp_1_1,%intr_gid_11; /* end ptx_get_global_id */ /* ptx_get_global_size_i */ .reg .s32 %intr_gdim_16,%intr_gdim_17; mul.lo.s32 %intr_gdim_16,0,4; mov.b32 %intr_gdim_17,%_global_size; add.s32 %intr_gdim_17,%intr_gdim_17,%intr_gdim_16; ld.const.s32 ltmp_2_1,[%intr_gdim_17]; /* end ptx_get_global_size_i */ mul.lo.s32 ltmp_3_1,ltmp_1_1,ltmp_2_1; add.s32 ltmp_4_1,ltmp_0_1,ltmp_3_1; setp.lt.u32 ltmp_5_2,ltmp_4_1,ptxbe_def_count_addr4; @ltmp_5_2 bra ltmp_12_4; bra ltmp_13_4; ltmp_12_4: mov.b32 ltmp_6_3, ptxbe_def_out; mad.lo.s32 ltmp_6_3, ltmp_4_1, 4, ltmp_6_3; mov.b32 ltmp_7_3, ptxbe_def_a; mad.lo.s32 ltmp_7_3, ltmp_4_1, 4, ltmp_7_3; ld.global.u32 ltmp_8_1, [ltmp_7_3+0]; mov.b32 ltmp_9_3, ptxbe_def_b; mad.lo.s32 ltmp_9_3, ltmp_4_1, 4, ltmp_9_3; ld.global.u32 ltmp_10_1, [ltmp_9_3+0]; add.s32 ltmp_11_1,ltmp_8_1,ltmp_10_1; st.global.u32 [ltmp_6_3+0],ltmp_11_1; ret; ltmp_13_4: ret; } .entry sum ( .param .b32 sum_param_0, .param .b32 sum_param_1) { .reg .b32 ptxbe_def_out; .reg .b32 ptxbe_def_in; /* Local variables */ .reg .s32 ltmp_14_1; .reg .b32 ltmp_15_5; .reg .b32 ltmp_16_3; .reg .s32 ltmp_17_1; .reg .pred ltmp_18_2; .reg .b32 ltmp_19_5; .reg .s32 ltmp_20_1; .reg .s32 ltmp_21_1; .reg .b32 ltmp_22_5; .reg .s32 ltmp_23_1; .reg .s32 ltmp_24_1; .reg .s32 ltmp_25_1; .reg .s32 ltmp_26_1; .reg .b32 ltmp_27_5; .reg .s32 ltmp_28_1; .reg .s32 ltmp_29_1; .reg .s32 ltmp_30_1; .reg .s32 ltmp_31_1; .reg .b32 ltmp_32_5; .reg .s32 ltmp_33_1; .reg .s32 ltmp_34_1; .reg .s32 ltmp_35_1; .reg .s32 ltmp_36_1; .reg .b32 ltmp_37_5; .reg .s32 ltmp_38_1; .reg .s32 ltmp_39_1; .reg .s32 ltmp_40_1; .reg .s32 ltmp_41_1; .reg .b32 ltmp_42_5; .reg .s32 ltmp_43_1; .reg .s32 ltmp_44_1; .reg .s32 ltmp_45_1; .reg .s32 ltmp_46_1; .reg .b32 ltmp_47_5; .reg .s32 ltmp_48_1; .reg .s32 ltmp_49_1; .reg .s32 ltmp_50_1; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [sum_param_0]; ld.param.b32 ptxbe_def_in, [sum_param_1]; /* ptx_get_local_id */ .reg .pred %intr_lid_18,%intr_lid_19,%intr_lid_20; setp.eq.u32 %intr_lid_18,0,0; setp.eq.u32 %intr_lid_19,0,1; setp.eq.u32 %intr_lid_20,0,2; @%intr_lid_18 cvt.u32.u16 ltmp_14_1,%tid.x; @%intr_lid_19 cvt.u32.u16 ltmp_14_1,%tid.y; @%intr_lid_20 cvt.u32.u16 ltmp_14_1,%tid.z; /* end ptx_get_local_id */ mov.b32 ltmp_15_5, shr_1_tmp; mad.lo.s32 ltmp_15_5, ltmp_14_1, 4, ltmp_15_5; mov.b32 ltmp_16_3, ptxbe_def_in; mad.lo.s32 ltmp_16_3, ltmp_14_1, 4, ltmp_16_3; ld.global.u32 ltmp_17_1, [ltmp_16_3+0]; st.shared.u32 [ltmp_15_5+0],ltmp_17_1; setp.lt.s32 ltmp_18_2,ltmp_14_1,32; @ltmp_18_2 bra ltmp_51_4; bra ltmp_52_4; ltmp_51_4: mov.b32 ltmp_19_5, shr_1_tmp; mad.lo.s32 ltmp_19_5, ltmp_14_1, 4, ltmp_19_5; ld.shared.u32 ltmp_20_1, [ltmp_19_5+0]; add.s32 ltmp_21_1,ltmp_14_1,32; mov.b32 ltmp_22_5, shr_1_tmp; mad.lo.s32 ltmp_22_5, ltmp_21_1, 4, ltmp_22_5; ld.shared.u32 ltmp_23_1, [ltmp_22_5+0]; add.s32 ltmp_24_1,ltmp_20_1,ltmp_23_1; st.shared.u32 [ltmp_19_5+0],ltmp_24_1; bar.sync 0; ld.shared.u32 ltmp_25_1, [ltmp_19_5+0]; add.s32 ltmp_26_1,ltmp_14_1,16; mov.b32 ltmp_27_5, shr_1_tmp; mad.lo.s32 ltmp_27_5, ltmp_26_1, 4, ltmp_27_5; ld.shared.u32 ltmp_28_1, [ltmp_27_5+0]; add.s32 ltmp_29_1,ltmp_25_1,ltmp_28_1; st.shared.u32 [ltmp_19_5+0],ltmp_29_1; bar.sync 0; ld.shared.u32 ltmp_30_1, [ltmp_19_5+0]; add.s32 ltmp_31_1,ltmp_14_1,8; mov.b32 ltmp_32_5, shr_1_tmp; mad.lo.s32 ltmp_32_5, ltmp_31_1, 4, ltmp_32_5; ld.shared.u32 ltmp_33_1, [ltmp_32_5+0]; add.s32 ltmp_34_1,ltmp_30_1,ltmp_33_1; st.shared.u32 [ltmp_19_5+0],ltmp_34_1; bar.sync 0; ld.shared.u32 ltmp_35_1, [ltmp_19_5+0]; add.s32 ltmp_36_1,ltmp_14_1,4; mov.b32 ltmp_37_5, shr_1_tmp; mad.lo.s32 ltmp_37_5, ltmp_36_1, 4, ltmp_37_5; ld.shared.u32 ltmp_38_1, [ltmp_37_5+0]; add.s32 ltmp_39_1,ltmp_35_1,ltmp_38_1; st.shared.u32 [ltmp_19_5+0],ltmp_39_1; bar.sync 0; ld.shared.u32 ltmp_40_1, [ltmp_19_5+0]; add.s32 ltmp_41_1,ltmp_14_1,2; mov.b32 ltmp_42_5, shr_1_tmp; mad.lo.s32 ltmp_42_5, ltmp_41_1, 4, ltmp_42_5; ld.shared.u32 ltmp_43_1, [ltmp_42_5+0]; add.s32 ltmp_44_1,ltmp_40_1,ltmp_43_1; st.shared.u32 [ltmp_19_5+0],ltmp_44_1; bar.sync 0; ld.shared.u32 ltmp_45_1, [ltmp_19_5+0]; add.s32 ltmp_46_1,ltmp_14_1,1; mov.b32 ltmp_47_5, shr_1_tmp; mad.lo.s32 ltmp_47_5, ltmp_46_1, 4, ltmp_47_5; ld.shared.u32 ltmp_48_1, [ltmp_47_5+0]; add.s32 ltmp_49_1,ltmp_45_1,ltmp_48_1; st.shared.u32 [ltmp_19_5+0],ltmp_49_1; bra ltmp_52_4; ltmp_52_4: bar.sync 0; mov.s32 %tmp_addr2, shr_1_tmp; ld.shared.u32 ltmp_50_1, [ %tmp_addr2 ]; st.global.u32 [ptxbe_def_out+0],ltmp_50_1; ret; } .func loadBlock (.reg .b32 ptxbe_rdo_image, .reg .u32 ptxbe_def_x_addr1, .reg .u32 ptxbe_def_y_addr2, .reg .u32 ptxbe_def_tid_addr3, .reg .b32 ptxbe_def_target){ /* Local variables */ .local .s32 ptxbe_sam_sampler_addr_i_i23; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i13; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i3; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i; /* Address-exposed local */ .reg .s32 ltmp_54_1; .reg .s32 ltmp_55_1; .reg .s32 ltmp_56_1; .reg .b32 ltmp_57_6; .reg .s32 ltmp_58_1; .reg .u32 ltmp_59_1; .reg .s32 ltmp_60_1; .reg .f32 %vec_ltmp_61_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_61_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_61_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_61_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp; .reg .f32 ltmp_62_8; .reg .f32 ltmp_63_8; .reg .f32 ltmp_64_8; .reg .u8 ltmp_65_9; .reg .s32 ltmp_66_1; .reg .s32 ltmp_67_1; .reg .b32 ltmp_68_6; .reg .s32 ltmp_69_1; .reg .s32 ltmp_70_1; .reg .f32 %vec_ltmp_71_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_71_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_71_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_71_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp39; .reg .f32 ltmp_72_8; .reg .f32 ltmp_73_8; .reg .f32 ltmp_74_8; .reg .u8 ltmp_75_9; .reg .s32 ltmp_76_1; .reg .s32 ltmp_77_1; .reg .b32 ltmp_78_6; .reg .s32 ltmp_79_1; .reg .s32 ltmp_80_1; .reg .f32 %vec_ltmp_81_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_81_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_81_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_81_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp36; .reg .f32 ltmp_82_8; .reg .f32 ltmp_83_8; .reg .f32 ltmp_84_8; .reg .u8 ltmp_85_9; .reg .s32 ltmp_86_1; .reg .s32 ltmp_87_1; .reg .b32 ltmp_88_6; .reg .s32 ltmp_89_1; .reg .s32 ltmp_90_1; .reg .f32 %vec_ltmp_91_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_91_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_91_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_91_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp33; .reg .f32 ltmp_92_8; .reg .f32 ltmp_93_8; .reg .f32 ltmp_94_8; .reg .u8 ltmp_95_9; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ div.s32 ltmp_54_1,ptxbe_def_tid_addr3,4; shl.b32 ltmp_55_1,ptxbe_def_tid_addr3,2; and.b32 ltmp_56_1,ltmp_55_1,12; mov.b32 ltmp_57_6, ptxbe_def_target; mad.lo.s32 ltmp_57_6, ltmp_55_1, 1, ltmp_57_6; mov.s32 ltmp_59_1,ptxbe_rdo_image; .reg .f32 %intr_w_f_26, %intr_h_f_25; .reg .s32 %intr_cord_22, %intr_cord_21; mov.s32 %intr_cord_22, ltmp_56_1; mov.s32 %intr_cord_21, ltmp_54_1; cvt.rz.ftz.f32.s32 %intr_w_f_26, %intr_cord_22; cvt.rz.ftz.f32.s32 %intr_h_f_25, %intr_cord_21; tex.2d.v4.f32.f32 { %vec_ltmp_61_7_0, %vec_ltmp_61_7_1, %vec_ltmp_61_7_2, %vec_ltmp_61_7_3 }, [ltmp_59_1, %sam_sampler_norm_0, {%intr_w_f_26, %intr_h_f_25}]; mov.f32 ptxbe_tmp,%vec_ltmp_61_7_0; mul.rn.ftz.f32 ltmp_62_8,ptxbe_tmp,0d406FE00000000000; add.rn.ftz.f32 ltmp_63_8,ltmp_62_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_64_8,ltmp_63_8; cvt.u8.f32.rzi ltmp_65_9,ltmp_64_8; st.shared.u8 [ltmp_57_6+0],ltmp_65_9; or.b32 ltmp_66_1,ltmp_56_1,1; or.b32 ltmp_67_1,ltmp_55_1,1; mov.b32 ltmp_68_6, ptxbe_def_target; mad.lo.s32 ltmp_68_6, ltmp_67_1, 1, ltmp_68_6; .reg .f32 %intr_w_f_34, %intr_h_f_33; .reg .s32 %intr_cord_30, %intr_cord_29; mov.s32 %intr_cord_30, ltmp_66_1; mov.s32 %intr_cord_29, ltmp_54_1; cvt.rz.ftz.f32.s32 %intr_w_f_34, %intr_cord_30; cvt.rz.ftz.f32.s32 %intr_h_f_33, %intr_cord_29; tex.2d.v4.f32.f32 { %vec_ltmp_71_7_0, %vec_ltmp_71_7_1, %vec_ltmp_71_7_2, %vec_ltmp_71_7_3 }, [ltmp_59_1, %sam_sampler_norm_0, {%intr_w_f_34, %intr_h_f_33}]; mov.f32 ptxbe_tmp39,%vec_ltmp_71_7_0; mul.rn.ftz.f32 ltmp_72_8,ptxbe_tmp39,0d406FE00000000000; add.rn.ftz.f32 ltmp_73_8,ltmp_72_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_74_8,ltmp_73_8; cvt.u8.f32.rzi ltmp_75_9,ltmp_74_8; st.shared.u8 [ltmp_68_6+0],ltmp_75_9; or.b32 ltmp_76_1,ltmp_56_1,2; or.b32 ltmp_77_1,ltmp_55_1,2; mov.b32 ltmp_78_6, ptxbe_def_target; mad.lo.s32 ltmp_78_6, ltmp_77_1, 1, ltmp_78_6; .reg .f32 %intr_w_f_42, %intr_h_f_41; .reg .s32 %intr_cord_38, %intr_cord_37; mov.s32 %intr_cord_38, ltmp_76_1; mov.s32 %intr_cord_37, ltmp_54_1; cvt.rz.ftz.f32.s32 %intr_w_f_42, %intr_cord_38; cvt.rz.ftz.f32.s32 %intr_h_f_41, %intr_cord_37; tex.2d.v4.f32.f32 { %vec_ltmp_81_7_0, %vec_ltmp_81_7_1, %vec_ltmp_81_7_2, %vec_ltmp_81_7_3 }, [ltmp_59_1, %sam_sampler_norm_0, {%intr_w_f_42, %intr_h_f_41}]; mov.f32 ptxbe_tmp36,%vec_ltmp_81_7_0; mul.rn.ftz.f32 ltmp_82_8,ptxbe_tmp36,0d406FE00000000000; add.rn.ftz.f32 ltmp_83_8,ltmp_82_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_84_8,ltmp_83_8; cvt.u8.f32.rzi ltmp_85_9,ltmp_84_8; st.shared.u8 [ltmp_78_6+0],ltmp_85_9; or.b32 ltmp_86_1,ltmp_56_1,3; or.b32 ltmp_87_1,ltmp_55_1,3; mov.b32 ltmp_88_6, ptxbe_def_target; mad.lo.s32 ltmp_88_6, ltmp_87_1, 1, ltmp_88_6; .reg .f32 %intr_w_f_50, %intr_h_f_49; .reg .s32 %intr_cord_46, %intr_cord_45; mov.s32 %intr_cord_46, ltmp_86_1; mov.s32 %intr_cord_45, ltmp_54_1; cvt.rz.ftz.f32.s32 %intr_w_f_50, %intr_cord_46; cvt.rz.ftz.f32.s32 %intr_h_f_49, %intr_cord_45; tex.2d.v4.f32.f32 { %vec_ltmp_91_7_0, %vec_ltmp_91_7_1, %vec_ltmp_91_7_2, %vec_ltmp_91_7_3 }, [ltmp_59_1, %sam_sampler_norm_0, {%intr_w_f_50, %intr_h_f_49}]; mov.f32 ptxbe_tmp33,%vec_ltmp_91_7_0; mul.rn.ftz.f32 ltmp_92_8,ptxbe_tmp33,0d406FE00000000000; add.rn.ftz.f32 ltmp_93_8,ltmp_92_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_94_8,ltmp_93_8; cvt.u8.f32.rzi ltmp_95_9,ltmp_94_8; st.shared.u8 [ltmp_88_6+0],ltmp_95_9; ret; } .entry test ( .param .b32 test_param_2) { .reg .b32 ptxbe_def_out; /* Local variables */ .reg .s32 ltmp_96_1; .reg .s8 ltmp_97_9; .reg .b32 ltmp_98_10; .reg .s8 ltmp_99_9; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [test_param_2]; /* ptx_get_local_id */ .reg .pred %intr_lid_53,%intr_lid_54,%intr_lid_55; setp.eq.u32 %intr_lid_53,0,0; setp.eq.u32 %intr_lid_54,0,1; setp.eq.u32 %intr_lid_55,0,2; @%intr_lid_53 cvt.u32.u16 ltmp_96_1,%tid.x; @%intr_lid_54 cvt.u32.u16 ltmp_96_1,%tid.y; @%intr_lid_55 cvt.u32.u16 ltmp_96_1,%tid.z; /* end ptx_get_local_id */ .reg .u32 %tmp_0; mov.u32 %tmp_0, 0; .reg .u32 %tmp_1; mov.u32 %tmp_1, 0; mov.s32 %tmp_addr2, shr_2_buf1; call loadBlock, (ptxbe_rdo_im1, %tmp_0, %tmp_1, ltmp_96_1, %tmp_val); .reg .u32 %tmp_2; mov.u32 %tmp_2, 0; .reg .u32 %tmp_3; mov.u32 %tmp_3, 0; mov.s32 %tmp_addr2, shr_3_buf2; call loadBlock, (ptxbe_rdo_im2, %tmp_2, %tmp_3, ltmp_96_1, %tmp_val); mov.s32 %tmp_addr2, shr_2_buf1; ld.shared.u8 ltmp_97_9, [ %tmp_addr2 ]; st.global.u8 [ptxbe_def_out+0],ltmp_97_9; mov.b32 ltmp_98_10, ptxbe_def_out; add.s32 ltmp_98_10, ltmp_98_10, 1; mov.s32 %tmp_addr2, shr_2_buf1; add.s32 %tmp_addr2, %tmp_addr2, 1; ld.shared.u8 ltmp_99_9, [ %tmp_addr2 ]; st.global.u8 [ltmp_98_10+0],ltmp_99_9; ret; }