// // Generated by NVIDIA PTX Backend for LLVM // .version 1.5 .target sm_13, texmode_independent /* Global Variables*/ .shared .align 4 .b8 shr_1_tmp[256]; .global .samplerref %sam_sampler_norm_0 = { addr_mode_0 = clamp_to_edge, addr_mode_1 = clamp_to_edge, addr_mode_2 = clamp_to_edge, filter_mode = nearest }; .shared .align 1 .b8 shr_2_buf1[256]; .shared .align 1 .b8 shr_3_buf2[256]; /* Global Launch Offsets */ .const[0] .s32 %_global_block_offset[3]; .const[0] .s32 %_global_launch_offset[3]; .const[0] .s32 %_global_num_groups[3]; .const[0] .s32 %_global_size[3]; .const[0] .u32 %_work_dim; /* Temporary variables for v2load/v4load/read */ .local .b8 vector_load_8[4]; .local .b16 vector_load_16[4]; .local .b32 vector_load_32[4]; .local .b64 vector_load_64[4]; /* Function Prototypes */ .global .texref test_param_0; .const[0] .u32 test_param_0_imgInfo[5]; .global .texref test_param_1; .const[0] .u32 test_param_1_imgInfo[5]; .const[0] .b32 %_dummy_const; /* needed to avoid an assert in driver */ /* Function Bodies */ .entry vec_add ( .param .b32 vec_add_param_0, .param .b32 vec_add_param_1, .param .b32 vec_add_param_2, .param .u32 vec_add_param_3) { .reg .b32 ptxbe_def_out; .reg .b32 ptxbe_def_a; .reg .b32 ptxbe_def_b; .reg .u32 ptxbe_def_count_addr4; /* Local variables */ .reg .s32 ltmp_0_1; .reg .s32 ltmp_1_1; .reg .s32 ltmp_2_1; .reg .s32 ltmp_3_1; .reg .s32 ltmp_4_1; .reg .pred ltmp_5_2; .reg .b32 ltmp_6_3; .reg .b32 ltmp_7_3; .reg .s32 ltmp_8_1; .reg .b32 ltmp_9_3; .reg .s32 ltmp_10_1; .reg .s32 ltmp_11_1; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [vec_add_param_0]; ld.param.b32 ptxbe_def_a, [vec_add_param_1]; ld.param.b32 ptxbe_def_b, [vec_add_param_2]; ld.param.u32 ptxbe_def_count_addr4, [vec_add_param_3]; /* ptx_get_global_id */ .reg .u32 %intr_gid_0,%intr_gid_1,%intr_gid_2, %intr_gid_3,%intr_gid_4; .reg .pred %intr_gid_5,%intr_gid_6,%intr_gid_7; setp.eq.u32 %intr_gid_5,0,0; setp.eq.u32 %intr_gid_6,0,1; setp.eq.u32 %intr_gid_7,0,2; @%intr_gid_5 cvt.u32.u16 %intr_gid_0,%tid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_0,%tid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_0,%tid.z; @%intr_gid_5 cvt.u32.u16 %intr_gid_1,%ntid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_1,%ntid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_1,%ntid.z; @%intr_gid_5 cvt.u32.u16 %intr_gid_2,%ctaid.x; @%intr_gid_6 cvt.u32.u16 %intr_gid_2,%ctaid.y; @%intr_gid_7 cvt.u32.u16 %intr_gid_2,%ctaid.z; mad.lo.u32 ltmp_0_1,%intr_gid_1,%intr_gid_2,%intr_gid_0; mov.s32 %intr_gid_3,0; mul.lo.s32 %intr_gid_3,%intr_gid_3,4; mov.s32 %intr_gid_4,%_global_launch_offset; add.s32 %intr_gid_4,%intr_gid_3,%intr_gid_4; ld.const.s32 %intr_gid_3,[%intr_gid_4]; add.s32 ltmp_0_1,ltmp_0_1,%intr_gid_3; /* end ptx_get_global_id */ /* ptx_get_global_id */ .reg .u32 %intr_gid_8,%intr_gid_9,%intr_gid_10, %intr_gid_11,%intr_gid_12; .reg .pred %intr_gid_13,%intr_gid_14,%intr_gid_15; setp.eq.u32 %intr_gid_13,1,0; setp.eq.u32 %intr_gid_14,1,1; setp.eq.u32 %intr_gid_15,1,2; @%intr_gid_13 cvt.u32.u16 %intr_gid_8,%tid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_8,%tid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_8,%tid.z; @%intr_gid_13 cvt.u32.u16 %intr_gid_9,%ntid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_9,%ntid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_9,%ntid.z; @%intr_gid_13 cvt.u32.u16 %intr_gid_10,%ctaid.x; @%intr_gid_14 cvt.u32.u16 %intr_gid_10,%ctaid.y; @%intr_gid_15 cvt.u32.u16 %intr_gid_10,%ctaid.z; mad.lo.u32 ltmp_1_1,%intr_gid_9,%intr_gid_10,%intr_gid_8; mov.s32 %intr_gid_11,1; mul.lo.s32 %intr_gid_11,%intr_gid_11,4; mov.s32 %intr_gid_12,%_global_launch_offset; add.s32 %intr_gid_12,%intr_gid_11,%intr_gid_12; ld.const.s32 %intr_gid_11,[%intr_gid_12]; add.s32 ltmp_1_1,ltmp_1_1,%intr_gid_11; /* end ptx_get_global_id */ /* ptx_get_global_size_i */ .reg .s32 %intr_gdim_16,%intr_gdim_17; mul.lo.s32 %intr_gdim_16,0,4; mov.b32 %intr_gdim_17,%_global_size; add.s32 %intr_gdim_17,%intr_gdim_17,%intr_gdim_16; ld.const.s32 ltmp_2_1,[%intr_gdim_17]; /* end ptx_get_global_size_i */ mul.lo.s32 ltmp_3_1,ltmp_1_1,ltmp_2_1; add.s32 ltmp_4_1,ltmp_0_1,ltmp_3_1; setp.lt.u32 ltmp_5_2,ltmp_4_1,ptxbe_def_count_addr4; @ltmp_5_2 bra ltmp_12_4; bra ltmp_13_4; ltmp_12_4: mov.b32 ltmp_6_3, ptxbe_def_out; mad.lo.s32 ltmp_6_3, ltmp_4_1, 4, ltmp_6_3; mov.b32 ltmp_7_3, ptxbe_def_a; mad.lo.s32 ltmp_7_3, ltmp_4_1, 4, ltmp_7_3; ld.global.u32 ltmp_8_1, [ltmp_7_3+0]; mov.b32 ltmp_9_3, ptxbe_def_b; mad.lo.s32 ltmp_9_3, ltmp_4_1, 4, ltmp_9_3; ld.global.u32 ltmp_10_1, [ltmp_9_3+0]; add.s32 ltmp_11_1,ltmp_8_1,ltmp_10_1; st.global.u32 [ltmp_6_3+0],ltmp_11_1; ret; ltmp_13_4: ret; } .entry sum ( .param .b32 sum_param_0, .param .b32 sum_param_1) { .reg .b32 ptxbe_def_out; .reg .b32 ptxbe_def_in; /* Local variables */ .reg .s32 ltmp_14_1; .reg .b32 ltmp_15_5; .reg .b32 ltmp_16_3; .reg .s32 ltmp_17_1; .reg .pred ltmp_18_2; .reg .b32 ltmp_19_5; .reg .s32 ltmp_20_1; .reg .s32 ltmp_21_1; .reg .b32 ltmp_22_5; .reg .s32 ltmp_23_1; .reg .s32 ltmp_24_1; .reg .s32 ltmp_25_1; .reg .s32 ltmp_26_1; .reg .b32 ltmp_27_5; .reg .s32 ltmp_28_1; .reg .s32 ltmp_29_1; .reg .s32 ltmp_30_1; .reg .s32 ltmp_31_1; .reg .b32 ltmp_32_5; .reg .s32 ltmp_33_1; .reg .s32 ltmp_34_1; .reg .s32 ltmp_35_1; .reg .s32 ltmp_36_1; .reg .b32 ltmp_37_5; .reg .s32 ltmp_38_1; .reg .s32 ltmp_39_1; .reg .s32 ltmp_40_1; .reg .s32 ltmp_41_1; .reg .b32 ltmp_42_5; .reg .s32 ltmp_43_1; .reg .s32 ltmp_44_1; .reg .s32 ltmp_45_1; .reg .s32 ltmp_46_1; .reg .b32 ltmp_47_5; .reg .s32 ltmp_48_1; .reg .s32 ltmp_49_1; .reg .s32 ltmp_50_1; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [sum_param_0]; ld.param.b32 ptxbe_def_in, [sum_param_1]; /* ptx_get_local_id */ .reg .pred %intr_lid_18,%intr_lid_19,%intr_lid_20; setp.eq.u32 %intr_lid_18,0,0; setp.eq.u32 %intr_lid_19,0,1; setp.eq.u32 %intr_lid_20,0,2; @%intr_lid_18 cvt.u32.u16 ltmp_14_1,%tid.x; @%intr_lid_19 cvt.u32.u16 ltmp_14_1,%tid.y; @%intr_lid_20 cvt.u32.u16 ltmp_14_1,%tid.z; /* end ptx_get_local_id */ mov.b32 ltmp_15_5, shr_1_tmp; mad.lo.s32 ltmp_15_5, ltmp_14_1, 4, ltmp_15_5; mov.b32 ltmp_16_3, ptxbe_def_in; mad.lo.s32 ltmp_16_3, ltmp_14_1, 4, ltmp_16_3; ld.global.u32 ltmp_17_1, [ltmp_16_3+0]; st.shared.u32 [ltmp_15_5+0],ltmp_17_1; setp.lt.s32 ltmp_18_2,ltmp_14_1,32; @ltmp_18_2 bra ltmp_51_4; bra ltmp_52_4; ltmp_51_4: mov.b32 ltmp_19_5, shr_1_tmp; mad.lo.s32 ltmp_19_5, ltmp_14_1, 4, ltmp_19_5; ld.shared.u32 ltmp_20_1, [ltmp_19_5+0]; add.s32 ltmp_21_1,ltmp_14_1,32; mov.b32 ltmp_22_5, shr_1_tmp; mad.lo.s32 ltmp_22_5, ltmp_21_1, 4, ltmp_22_5; ld.shared.u32 ltmp_23_1, [ltmp_22_5+0]; add.s32 ltmp_24_1,ltmp_20_1,ltmp_23_1; st.shared.u32 [ltmp_19_5+0],ltmp_24_1; bar.sync 0; ld.shared.u32 ltmp_25_1, [ltmp_19_5+0]; add.s32 ltmp_26_1,ltmp_14_1,16; mov.b32 ltmp_27_5, shr_1_tmp; mad.lo.s32 ltmp_27_5, ltmp_26_1, 4, ltmp_27_5; ld.shared.u32 ltmp_28_1, [ltmp_27_5+0]; add.s32 ltmp_29_1,ltmp_25_1,ltmp_28_1; st.shared.u32 [ltmp_19_5+0],ltmp_29_1; bar.sync 0; ld.shared.u32 ltmp_30_1, [ltmp_19_5+0]; add.s32 ltmp_31_1,ltmp_14_1,8; mov.b32 ltmp_32_5, shr_1_tmp; mad.lo.s32 ltmp_32_5, ltmp_31_1, 4, ltmp_32_5; ld.shared.u32 ltmp_33_1, [ltmp_32_5+0]; add.s32 ltmp_34_1,ltmp_30_1,ltmp_33_1; st.shared.u32 [ltmp_19_5+0],ltmp_34_1; bar.sync 0; ld.shared.u32 ltmp_35_1, [ltmp_19_5+0]; add.s32 ltmp_36_1,ltmp_14_1,4; mov.b32 ltmp_37_5, shr_1_tmp; mad.lo.s32 ltmp_37_5, ltmp_36_1, 4, ltmp_37_5; ld.shared.u32 ltmp_38_1, [ltmp_37_5+0]; add.s32 ltmp_39_1,ltmp_35_1,ltmp_38_1; st.shared.u32 [ltmp_19_5+0],ltmp_39_1; bar.sync 0; ld.shared.u32 ltmp_40_1, [ltmp_19_5+0]; add.s32 ltmp_41_1,ltmp_14_1,2; mov.b32 ltmp_42_5, shr_1_tmp; mad.lo.s32 ltmp_42_5, ltmp_41_1, 4, ltmp_42_5; ld.shared.u32 ltmp_43_1, [ltmp_42_5+0]; add.s32 ltmp_44_1,ltmp_40_1,ltmp_43_1; st.shared.u32 [ltmp_19_5+0],ltmp_44_1; bar.sync 0; ld.shared.u32 ltmp_45_1, [ltmp_19_5+0]; add.s32 ltmp_46_1,ltmp_14_1,1; mov.b32 ltmp_47_5, shr_1_tmp; mad.lo.s32 ltmp_47_5, ltmp_46_1, 4, ltmp_47_5; ld.shared.u32 ltmp_48_1, [ltmp_47_5+0]; add.s32 ltmp_49_1,ltmp_45_1,ltmp_48_1; st.shared.u32 [ltmp_19_5+0],ltmp_49_1; bra ltmp_52_4; ltmp_52_4: bar.sync 0; mov.s32 %tmp_addr2, shr_1_tmp; ld.shared.u32 ltmp_50_1, [ %tmp_addr2 ]; st.global.u32 [ptxbe_def_out+0],ltmp_50_1; ret; } .entry test ( .param .b32 test_param_2) { .reg .b32 ptxbe_def_out; /* Local variables */ .local .s32 ptxbe_sam_sampler_cbank; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i23_i; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i13_i; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i3_i; /* Address-exposed local */ .local .s32 ptxbe_sam_sampler_addr_i_i_i; /* Address-exposed local */ .reg .s32 ltmp_54_1; .reg .s32 ltmp_55_1; .reg .s32 ltmp_56_1; .reg .s32 ltmp_57_1; .reg .b32 ltmp_58_6; .reg .s32 ltmp_59_1; .reg .u32 ltmp_60_1; .reg .s32 ltmp_61_1; .reg .f32 %vec_ltmp_62_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_62_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_62_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_62_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp; .reg .f32 ltmp_63_8; .reg .f32 ltmp_64_8; .reg .f32 ltmp_65_8; .reg .u8 ltmp_66_9; .reg .s32 ltmp_67_1; .reg .s32 ltmp_68_1; .reg .b32 ltmp_69_6; .reg .s32 ltmp_70_1; .reg .s32 ltmp_71_1; .reg .f32 %vec_ltmp_72_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_72_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_72_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_72_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp18; .reg .f32 ltmp_73_8; .reg .f32 ltmp_74_8; .reg .f32 ltmp_75_8; .reg .u8 ltmp_76_9; .reg .s32 ltmp_77_1; .reg .s32 ltmp_78_1; .reg .b32 ltmp_79_6; .reg .s32 ltmp_80_1; .reg .s32 ltmp_81_1; .reg .f32 %vec_ltmp_82_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_82_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_82_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_82_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp15; .reg .f32 ltmp_83_8; .reg .f32 ltmp_84_8; .reg .f32 ltmp_85_8; .reg .u8 ltmp_86_9; .reg .s32 ltmp_87_1; .reg .s32 ltmp_88_1; .reg .b32 ltmp_89_6; .reg .s32 ltmp_90_1; .reg .s32 ltmp_91_1; .reg .f32 %vec_ltmp_92_7_0; /* Expanded vector register */ .reg .f32 %vec_ltmp_92_7_1; /* Expanded vector register */ .reg .f32 %vec_ltmp_92_7_2; /* Expanded vector register */ .reg .f32 %vec_ltmp_92_7_3; /* Expanded vector register */ .reg .f32 ptxbe_tmp12; .reg .f32 ltmp_93_8; .reg .f32 ltmp_94_8; .reg .f32 ltmp_95_8; .reg .u8 ltmp_96_9; .reg .s8 ltmp_97_9; .reg .b32 ltmp_98_10; .reg .s8 ltmp_99_9; .reg .s32 %gep_offset; /* Temporary offset for GEP calculations */ .reg .b32 %tmp_addr; /* Temporary address */ .reg .b32 %tmp_addr2; /* Temporary address */ .reg .b32 %tmp_val; /* Temporary value */ ld.param.b32 ptxbe_def_out, [test_param_2]; .reg .u32 %store_0; mov.u32 %store_0,0; st.local.u32 [ptxbe_sam_sampler_cbank+0],%store_0; /* ptx_get_local_id */ .reg .pred %intr_lid_21,%intr_lid_22,%intr_lid_23; setp.eq.u32 %intr_lid_21,0,0; setp.eq.u32 %intr_lid_22,0,1; setp.eq.u32 %intr_lid_23,0,2; @%intr_lid_21 cvt.u32.u16 ltmp_54_1,%tid.x; @%intr_lid_22 cvt.u32.u16 ltmp_54_1,%tid.y; @%intr_lid_23 cvt.u32.u16 ltmp_54_1,%tid.z; /* end ptx_get_local_id */ div.s32 ltmp_55_1,ltmp_54_1,4; shl.b32 ltmp_56_1,ltmp_54_1,2; and.b32 ltmp_57_1,ltmp_56_1,12; mov.b32 ltmp_58_6, shr_2_buf1; mad.lo.s32 ltmp_58_6, ltmp_56_1, 1, ltmp_58_6; ld.const[0].b32 ltmp_59_1, [sam_sampler]; st.local.u32 [ptxbe_sam_sampler_addr_i_i_i+0],ltmp_59_1; ld.local.u32 ltmp_61_1, [ptxbe_sam_sampler_addr_i_i_i+0]; .reg .f32 %intr_w_f_29, %intr_h_f_28; .reg .s32 %intr_cord_25, %intr_cord_24; mov.s32 %intr_cord_25, ltmp_57_1; mov.s32 %intr_cord_24, ltmp_55_1; cvt.rz.ftz.f32.s32 %intr_w_f_29, %intr_cord_25; cvt.rz.ftz.f32.s32 %intr_h_f_28, %intr_cord_24; tex.2d.v4.f32.f32 { %vec_ltmp_62_7_0, %vec_ltmp_62_7_1, %vec_ltmp_62_7_2, %vec_ltmp_62_7_3 }, [test_param_0, ltmp_61_1, {%intr_w_f_29, %intr_h_f_28}]; mov.f32 ptxbe_tmp,%vec_ltmp_62_7_0; mul.rn.ftz.f32 ltmp_63_8,ptxbe_tmp,0d406FE00000000000; add.rn.ftz.f32 ltmp_64_8,ltmp_63_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_65_8,ltmp_64_8; cvt.u8.f32.rzi ltmp_66_9,ltmp_65_8; st.shared.u8 [ltmp_58_6+0],ltmp_66_9; or.b32 ltmp_67_1,ltmp_57_1,1; or.b32 ltmp_68_1,ltmp_56_1,1; mov.b32 ltmp_69_6, shr_2_buf1; mad.lo.s32 ltmp_69_6, ltmp_68_1, 1, ltmp_69_6; ld.const[0].b32 ltmp_70_1, [sam_sampler]; st.local.u32 [ptxbe_sam_sampler_addr_i_i23_i+0],ltmp_70_1; ld.local.u32 ltmp_71_1, [ptxbe_sam_sampler_addr_i_i23_i+0]; .reg .f32 %intr_w_f_37, %intr_h_f_36; .reg .s32 %intr_cord_33, %intr_cord_32; mov.s32 %intr_cord_33, ltmp_67_1; mov.s32 %intr_cord_32, ltmp_55_1; cvt.rz.ftz.f32.s32 %intr_w_f_37, %intr_cord_33; cvt.rz.ftz.f32.s32 %intr_h_f_36, %intr_cord_32; tex.2d.v4.f32.f32 { %vec_ltmp_72_7_0, %vec_ltmp_72_7_1, %vec_ltmp_72_7_2, %vec_ltmp_72_7_3 }, [test_param_0, ltmp_71_1, {%intr_w_f_37, %intr_h_f_36}]; mov.f32 ptxbe_tmp18,%vec_ltmp_72_7_0; mul.rn.ftz.f32 ltmp_73_8,ptxbe_tmp18,0d406FE00000000000; add.rn.ftz.f32 ltmp_74_8,ltmp_73_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_75_8,ltmp_74_8; cvt.u8.f32.rzi ltmp_76_9,ltmp_75_8; st.shared.u8 [ltmp_69_6+0],ltmp_76_9; or.b32 ltmp_77_1,ltmp_57_1,2; or.b32 ltmp_78_1,ltmp_56_1,2; mov.b32 ltmp_79_6, shr_2_buf1; mad.lo.s32 ltmp_79_6, ltmp_78_1, 1, ltmp_79_6; ld.const[0].b32 ltmp_80_1, [sam_sampler]; st.local.u32 [ptxbe_sam_sampler_addr_i_i13_i+0],ltmp_80_1; ld.local.u32 ltmp_81_1, [ptxbe_sam_sampler_addr_i_i13_i+0]; .reg .f32 %intr_w_f_45, %intr_h_f_44; .reg .s32 %intr_cord_41, %intr_cord_40; mov.s32 %intr_cord_41, ltmp_77_1; mov.s32 %intr_cord_40, ltmp_55_1; cvt.rz.ftz.f32.s32 %intr_w_f_45, %intr_cord_41; cvt.rz.ftz.f32.s32 %intr_h_f_44, %intr_cord_40; tex.2d.v4.f32.f32 { %vec_ltmp_82_7_0, %vec_ltmp_82_7_1, %vec_ltmp_82_7_2, %vec_ltmp_82_7_3 }, [test_param_0, ltmp_81_1, {%intr_w_f_45, %intr_h_f_44}]; mov.f32 ptxbe_tmp15,%vec_ltmp_82_7_0; mul.rn.ftz.f32 ltmp_83_8,ptxbe_tmp15,0d406FE00000000000; add.rn.ftz.f32 ltmp_84_8,ltmp_83_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_85_8,ltmp_84_8; cvt.u8.f32.rzi ltmp_86_9,ltmp_85_8; st.shared.u8 [ltmp_79_6+0],ltmp_86_9; or.b32 ltmp_87_1,ltmp_57_1,3; or.b32 ltmp_88_1,ltmp_56_1,3; mov.b32 ltmp_89_6, shr_2_buf1; mad.lo.s32 ltmp_89_6, ltmp_88_1, 1, ltmp_89_6; ld.const[0].b32 ltmp_90_1, [sam_sampler]; st.local.u32 [ptxbe_sam_sampler_addr_i_i3_i+0],ltmp_90_1; ld.local.u32 ltmp_91_1, [ptxbe_sam_sampler_addr_i_i3_i+0]; .reg .f32 %intr_w_f_53, %intr_h_f_52; .reg .s32 %intr_cord_49, %intr_cord_48; mov.s32 %intr_cord_49, ltmp_87_1; mov.s32 %intr_cord_48, ltmp_55_1; cvt.rz.ftz.f32.s32 %intr_w_f_53, %intr_cord_49; cvt.rz.ftz.f32.s32 %intr_h_f_52, %intr_cord_48; tex.2d.v4.f32.f32 { %vec_ltmp_92_7_0, %vec_ltmp_92_7_1, %vec_ltmp_92_7_2, %vec_ltmp_92_7_3 }, [test_param_0, ltmp_91_1, {%intr_w_f_53, %intr_h_f_52}]; mov.f32 ptxbe_tmp12,%vec_ltmp_92_7_0; mul.rn.ftz.f32 ltmp_93_8,ptxbe_tmp12,0d406FE00000000000; add.rn.ftz.f32 ltmp_94_8,ltmp_93_8,0d3FE0000000000000; cvt.rmi.f32.f32 ltmp_95_8,ltmp_94_8; cvt.u8.f32.rzi ltmp_96_9,ltmp_95_8; st.shared.u8 [ltmp_89_6+0],ltmp_96_9; mov.s32 %tmp_addr2, shr_2_buf1; ld.shared.u8 ltmp_97_9, [ %tmp_addr2 ]; st.global.u8 [ptxbe_def_out+0],ltmp_97_9; mov.b32 ltmp_98_10, ptxbe_def_out; add.s32 ltmp_98_10, ltmp_98_10, 1; mov.s32 %tmp_addr2, shr_3_buf2; add.s32 %tmp_addr2, %tmp_addr2, 1; ld.shared.u8 ltmp_99_9, [ %tmp_addr2 ]; st.global.u8 [ltmp_98_10+0],ltmp_99_9; ret; }