.version 1.4 .target sm_13 // compiled with /usr/local/cuda/open64/lib//be // nvopencc 2.3 built on 2009-07-30 //----------------------------------------------------------- // Compiling sawtrimer.cpp3.i (/tmp/ccBI#.Uz19Jo) //----------------------------------------------------------- //----------------------------------------------------------- // Options: //----------------------------------------------------------- // Target:ptx, ISA:sm_13, Endian:little, Pointer Size:64 // -O3 (Optimization level) // -g0 (Debug level) // -m2 (Report advisories) //----------------------------------------------------------- .file 1 "sawtrimer.cudafe2.gpu" .file 2 "../src/saw.h" .file 3 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h" .file 5 "/usr/local/cuda/bin/../include/host_defines.h" .file 6 "/usr/local/cuda/bin/../include/builtin_types.h" .file 7 "/usr/local/cuda/bin/../include/device_types.h" .file 8 "/usr/local/cuda/bin/../include/driver_types.h" .file 9 "/usr/local/cuda/bin/../include/texture_types.h" .file 10 "/usr/local/cuda/bin/../include/vector_types.h" .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h" .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h" .file 13 "/usr/include/bits/types.h" .file 14 "/usr/include/time.h" .file 15 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" .file 16 "../src/sawtrimer.cu" .file 17 "/usr/local/cuda/bin/../include/common_functions.h" .file 18 "/usr/local/cuda/bin/../include/crt/func_macro.h" .file 19 "/usr/local/cuda/bin/../include/math_functions.h" .file 20 "/usr/local/cuda/bin/../include/device_functions.h" .file 21 "/usr/local/cuda/bin/../include/math_constants.h" .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h" .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h" .file 24 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" .file 25 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx3.h" .global .align 8 .b8 kernelParams[96]; .entry _Z10saw_trimerv { .reg .u16 %rh<61>; .reg .u32 %rv1; .reg .u32 %r<235>; .reg .u64 %rd<53>; .reg .pred %p<44>; .local .align 4 .b8 __cuda___cuda_steps_1696[192]; .local .align 2 .b8 __cuda___cuda_sites_208288[128]; .loc 16 285 0 $LBB1__Z10saw_trimerv: .loc 16 331 0 ld.global.s32 %r1, [kernelParams+0]; .loc 16 332 0 ld.global.s32 %r2, [kernelParams+88]; .loc 16 333 0 ld.global.s32 %r3, [kernelParams+56]; .loc 16 334 0 ld.global.s32 %r4, [kernelParams+24]; .loc 16 335 0 ld.global.u64 %rd1, [kernelParams+8]; .loc 16 336 0 ld.global.u64 %rd2, [kernelParams+40]; .loc 16 338 0 ld.global.u64 %rd3, [kernelParams+80]; .loc 16 392 0 cvt.s8.s32 %r5, %r4; mov.s32 %r6, 1; mov.u64 %rd4, __cuda___cuda_steps_1696; mov.u64 %rd5, __cuda___cuda_sites_208288; $Lt_0_26370: // Loop body line 395 .loc 16 395 0 mov.u32 %r7, 0; setp.eq.s32 %p1, %r6, %r7; @%p1 bra $Lt_0_39170; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 400 0 mov.s16 %rh1, 0; mov.s16 %rh2, 0; .loc 16 409 0 mov.u64 %rd6, 0; .loc 16 412 0 mov.s32 %r8, 1; .loc 16 413 0 mov.s32 %r9, %r1; .loc 16 414 0 mov.s32 %r10, 1; .loc 16 415 0 mov.s32 %r11, 0; .loc 16 416 0 mov.s32 %r12, %r5; .loc 16 422 0 mov.s16 %rh3, 0; st.local.s16 [__cuda___cuda_sites_208288+0], %rh3; mov.s16 %rh4, 0; st.local.s16 [__cuda___cuda_sites_208288+2], %rh4; mov.s16 %rh5, 0; st.local.s16 [__cuda___cuda_sites_208288+4], %rh5; mov.s16 %rh6, 0; st.local.s16 [__cuda___cuda_sites_208288+6], %rh6; mov.s16 %rh7, 0; st.local.s16 [__cuda___cuda_sites_208288+8], %rh7; mov.s16 %rh8, 0; st.local.s16 [__cuda___cuda_sites_208288+10], %rh8; mov.s16 %rh9, 0; st.local.s16 [__cuda___cuda_sites_208288+12], %rh9; mov.s16 %rh10, 0; st.local.s16 [__cuda___cuda_sites_208288+14], %rh10; mov.s16 %rh11, 0; st.local.s16 [__cuda___cuda_sites_208288+16], %rh11; mov.s16 %rh12, 0; st.local.s16 [__cuda___cuda_sites_208288+18], %rh12; mov.s16 %rh13, 0; st.local.s16 [__cuda___cuda_sites_208288+20], %rh13; mov.s16 %rh14, 0; st.local.s16 [__cuda___cuda_sites_208288+22], %rh14; mov.s16 %rh15, 0; st.local.s16 [__cuda___cuda_sites_208288+24], %rh15; mov.s16 %rh16, 0; st.local.s16 [__cuda___cuda_sites_208288+26], %rh16; mov.s16 %rh17, 0; st.local.s16 [__cuda___cuda_sites_208288+28], %rh17; mov.s16 %rh18, 0; st.local.s16 [__cuda___cuda_sites_208288+30], %rh18; mov.s16 %rh19, 0; st.local.s16 [__cuda___cuda_sites_208288+32], %rh19; mov.s16 %rh20, 0; st.local.s16 [__cuda___cuda_sites_208288+34], %rh20; mov.s16 %rh21, 0; st.local.s16 [__cuda___cuda_sites_208288+36], %rh21; mov.s16 %rh22, 0; st.local.s16 [__cuda___cuda_sites_208288+38], %rh22; mov.s16 %rh23, 0; st.local.s16 [__cuda___cuda_sites_208288+40], %rh23; mov.s16 %rh24, 0; st.local.s16 [__cuda___cuda_sites_208288+42], %rh24; mov.s16 %rh25, 0; st.local.s16 [__cuda___cuda_sites_208288+44], %rh25; mov.s16 %rh26, 0; st.local.s16 [__cuda___cuda_sites_208288+46], %rh26; mov.s16 %rh27, 0; st.local.s16 [__cuda___cuda_sites_208288+48], %rh27; mov.s16 %rh28, 0; st.local.s16 [__cuda___cuda_sites_208288+50], %rh28; mov.s16 %rh29, 0; st.local.s16 [__cuda___cuda_sites_208288+52], %rh29; mov.s16 %rh30, 0; st.local.s16 [__cuda___cuda_sites_208288+54], %rh30; mov.s16 %rh31, 0; st.local.s16 [__cuda___cuda_sites_208288+56], %rh31; mov.s16 %rh32, 0; st.local.s16 [__cuda___cuda_sites_208288+58], %rh32; mov.s16 %rh33, 0; st.local.s16 [__cuda___cuda_sites_208288+60], %rh33; mov.s16 %rh34, 0; st.local.s16 [__cuda___cuda_sites_208288+62], %rh34; .loc 16 428 0 mov.s32 %r13, 0; st.local.s32 [__cuda___cuda_steps_1696+0], %r13; mov.s32 %r14, 0; st.local.s32 [__cuda___cuda_steps_1696+4], %r14; mov.s32 %r15, 0; st.local.s32 [__cuda___cuda_steps_1696+8], %r15; mov.s32 %r16, 0; st.local.s32 [__cuda___cuda_steps_1696+12], %r16; mov.s32 %r17, 0; st.local.s32 [__cuda___cuda_steps_1696+16], %r17; mov.s32 %r18, 0; st.local.s32 [__cuda___cuda_steps_1696+20], %r18; mov.s32 %r19, 0; st.local.s32 [__cuda___cuda_steps_1696+24], %r19; mov.s32 %r20, 0; st.local.s32 [__cuda___cuda_steps_1696+28], %r20; mov.s32 %r21, 0; st.local.s32 [__cuda___cuda_steps_1696+32], %r21; mov.s32 %r22, 0; st.local.s32 [__cuda___cuda_steps_1696+36], %r22; mov.s32 %r23, 0; st.local.s32 [__cuda___cuda_steps_1696+40], %r23; mov.s32 %r24, 0; st.local.s32 [__cuda___cuda_steps_1696+44], %r24; mov.s32 %r25, 0; st.local.s32 [__cuda___cuda_steps_1696+48], %r25; mov.s32 %r26, 0; st.local.s32 [__cuda___cuda_steps_1696+52], %r26; mov.s32 %r27, 0; st.local.s32 [__cuda___cuda_steps_1696+56], %r27; mov.s32 %r28, 0; st.local.s32 [__cuda___cuda_steps_1696+60], %r28; mov.s32 %r29, 0; st.local.s32 [__cuda___cuda_steps_1696+64], %r29; mov.s32 %r30, 0; st.local.s32 [__cuda___cuda_steps_1696+68], %r30; mov.s32 %r31, 0; st.local.s32 [__cuda___cuda_steps_1696+72], %r31; mov.s32 %r32, 0; st.local.s32 [__cuda___cuda_steps_1696+76], %r32; mov.s32 %r33, 0; st.local.s32 [__cuda___cuda_steps_1696+80], %r33; mov.s32 %r34, 0; st.local.s32 [__cuda___cuda_steps_1696+84], %r34; mov.s32 %r35, 0; st.local.s32 [__cuda___cuda_steps_1696+88], %r35; mov.s32 %r36, 0; st.local.s32 [__cuda___cuda_steps_1696+92], %r36; .loc 16 436 0 mov.s32 %r37, 1; atom.global.add.s32 %rv1, [%rd3], %r37; mov.s32 %r38, %rv1; mov.s32 %r39, %r38; cvt.u64.s32 %rd7, %r38; mul.lo.u64 %rd8, %rd7, 4; add.u64 %rd9, %rd2, %rd8; ld.global.s32 %r40, [%rd9+0]; mov.u32 %r41, 0; setp.eq.s32 %p2, %r40, %r41; @%p2 bra $L_0_37122; // Part of loop body line 395, head labeled $Lt_0_26370 setp.ne.s32 %p3, %r38, %r2; @%p3 bra $L_0_36866; $L_0_37122: .loc 15 103 0 mov.s32 %r42, -1; atom.global.add.s32 %rv1, [%rd3], %r42; .loc 16 436 0 bra.uni $Lt_0_514; $L_0_36866: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 443 0 mov.s32 %r43, 0; setp.eq.s32 %p4, %r3, %r43; @!%p4 bra $Lt_0_39426; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 452 0 cvt.s64.s32 %rd10, %r38; mul.lo.u64 %rd11, %rd10, 4; add.u64 %rd12, %rd2, %rd11; ld.global.s32 %r44, [%rd12+0]; st.local.s32 [__cuda___cuda_steps_1696+0], %r44; ld.global.s32 %r45, [%rd12+40000000]; st.local.s32 [__cuda___cuda_steps_1696+4], %r45; ld.global.s32 %r46, [%rd12+80000000]; st.local.s32 [__cuda___cuda_steps_1696+8], %r46; ld.global.s32 %r47, [%rd12+120000000]; st.local.s32 [__cuda___cuda_steps_1696+12], %r47; ld.global.s32 %r48, [%rd12+160000000]; st.local.s32 [__cuda___cuda_steps_1696+16], %r48; ld.global.s32 %r49, [%rd12+200000000]; st.local.s32 [__cuda___cuda_steps_1696+20], %r49; ld.global.s32 %r50, [%rd12+240000000]; st.local.s32 [__cuda___cuda_steps_1696+24], %r50; ld.global.s32 %r51, [%rd12+280000000]; st.local.s32 [__cuda___cuda_steps_1696+28], %r51; ld.global.s32 %r52, [%rd12+320000000]; st.local.s32 [__cuda___cuda_steps_1696+32], %r52; ld.global.s32 %r53, [%rd12+360000000]; st.local.s32 [__cuda___cuda_steps_1696+36], %r53; ld.global.s32 %r54, [%rd12+400000000]; st.local.s32 [__cuda___cuda_steps_1696+40], %r54; ld.global.s32 %r55, [%rd12+440000000]; st.local.s32 [__cuda___cuda_steps_1696+44], %r55; ld.global.s32 %r56, [%rd12+480000000]; st.local.s32 [__cuda___cuda_steps_1696+48], %r56; ld.global.s32 %r57, [%rd12+520000000]; st.local.s32 [__cuda___cuda_steps_1696+52], %r57; ld.global.s32 %r58, [%rd12+560000000]; st.local.s32 [__cuda___cuda_steps_1696+56], %r58; ld.global.s32 %r59, [%rd12+600000000]; st.local.s32 [__cuda___cuda_steps_1696+60], %r59; ld.global.s32 %r60, [%rd12+640000000]; st.local.s32 [__cuda___cuda_steps_1696+64], %r60; ld.global.s32 %r61, [%rd12+680000000]; st.local.s32 [__cuda___cuda_steps_1696+68], %r61; ld.global.s32 %r62, [%rd12+720000000]; st.local.s32 [__cuda___cuda_steps_1696+72], %r62; ld.global.s32 %r63, [%rd12+760000000]; st.local.s32 [__cuda___cuda_steps_1696+76], %r63; ld.global.s32 %r64, [%rd12+800000000]; st.local.s32 [__cuda___cuda_steps_1696+80], %r64; ld.global.s32 %r65, [%rd12+840000000]; st.local.s32 [__cuda___cuda_steps_1696+84], %r65; ld.global.s32 %r66, [%rd12+880000000]; st.local.s32 [__cuda___cuda_steps_1696+88], %r66; ld.global.s32 %r67, [%rd12+920000000]; st.local.s32 [__cuda___cuda_steps_1696+92], %r67; $Lt_0_39426: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 461 0 sub.s32 %r68, %r4, 1; .loc 16 470 0 @!%p4 bra $Lt_0_51970; // Part of loop body line 395, head labeled $Lt_0_26370 cvt.u64.s32 %rd13, %r68; mul.lo.u64 %rd14, %rd13, 4; add.u64 %rd15, %rd4, %rd14; ld.local.s32 %r69, [%rd15+0]; mov.u32 %r70, 0; setp.ne.s32 %p5, %r69, %r70; @%p5 bra $Lt_0_51970; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r71, 1; bra.uni $L_0_37378; $Lt_0_51970: $L_0_37634: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r71, 0; $L_0_37378: // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r72, 0; setp.ne.s32 %p6, %r71, %r72; @%p6 bra $Lt_0_514; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 477 0 cvt.s8.s32 %rh35, %r9; mov.s16 %rh36, %rh2; mov.s16 %rh37, %rh1; mov.s32 %r73, %r68; cvt.s8.s32 %rh38, %r11; cvt.s8.s32 %rh39, %r10; mov.s32 %r74, %r39; mov.s64 %rd16, %rd6; cvt.s8.s32 %rh40, %r8; cvt.s8.s32 %rh41, %r12; mov.s32 %r75, 16912; mov.s32 %r76, 16912; .loc 16 70 0 mov.s32 %r77, 0; $Lt_0_29186: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd17, %r77; mul.lo.u64 %rd18, %rd17, 2; add.u64 %rd19, %rd5, %rd18; ld.local.s16 %r78, [%rd19+0]; mov.u32 %r79, 0; setp.eq.s32 %p7, %r78, %r79; @%p7 bra $Lt_0_52738; // Part of loop body line 72, head labeled $Lt_0_29186 .loc 16 73 0 mov.u32 %r80, 16912; setp.eq.s32 %p8, %r78, %r80; @%p8 bra $Lt_0_52738; // Part of loop body line 72, head labeled $Lt_0_29186 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_29186; $Lt_0_52738: $Lt_0_2050: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 479 0 mov.s16 %rh42, 16912; st.local.s16 [%rd19+0], %rh42; mov.u32 %r81, 0; setp.eq.s32 %p9, %r4, %r81; @%p9 bra $Lt_0_40194; // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r82, 0; setp.le.s32 %p10, %r4, %r82; @%p10 bra $Lt_0_53250; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r83, %r4; mov.u64 %rd20, __cuda___cuda_steps_1696; mov.s32 %r84, 0; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r85, %r83; $Lt_0_40962: // Loop body line 479, nesting depth: 1, estimated iterations: unknown ld.local.s32 %r86, [%rd20+0]; and.b32 %r87, %r84, 1; mov.u32 %r88, 0; setp.ne.u32 %p11, %r87, %r88; .loc 16 70 0 mov.s32 %r77, 0; .loc 16 479 0 @%p11 bra $Lt_0_41474; // Part of loop body line 479, head labeled $Lt_0_40962 .loc 16 500 0 add.s32 %r75, %r86, %r75; .loc 16 70 0 cvt.s16.s32 %r89, %r75; $Lt_0_30466: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd21, %r77; mul.lo.u64 %rd22, %rd21, 2; add.u64 %rd19, %rd5, %rd22; ld.local.s16 %r78, [%rd19+0]; mov.u32 %r90, 0; setp.eq.s32 %p12, %r78, %r90; @%p12 bra $Lt_0_53506; // Part of loop body line 72, head labeled $Lt_0_30466 .loc 16 73 0 setp.eq.s32 %p13, %r89, %r78; @%p13 bra $Lt_0_53506; // Part of loop body line 72, head labeled $Lt_0_30466 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_30466; $Lt_0_53506: $Lt_0_1794: // Part of loop body line 479, head labeled $Lt_0_40962 .loc 16 503 0 st.local.s16 [%rd19+0], %r89; bra.uni $Lt_0_41218; $Lt_0_41474: // Part of loop body line 479, head labeled $Lt_0_40962 .loc 16 508 0 add.s32 %r76, %r86, %r76; .loc 16 70 0 cvt.s16.s32 %r91, %r76; $Lt_0_30722: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd23, %r77; mul.lo.u64 %rd24, %rd23, 2; add.u64 %rd19, %rd5, %rd24; ld.local.s16 %r78, [%rd19+0]; mov.u32 %r92, 0; setp.eq.s32 %p14, %r78, %r92; @%p14 bra $Lt_0_54018; // Part of loop body line 72, head labeled $Lt_0_30722 .loc 16 73 0 setp.eq.s32 %p15, %r91, %r78; @%p15 bra $Lt_0_54018; // Part of loop body line 72, head labeled $Lt_0_30722 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_30722; $Lt_0_54018: $Lt_0_1538: // Part of loop body line 479, head labeled $Lt_0_40962 .loc 16 511 0 st.local.s16 [%rd19+0], %r91; $Lt_0_41218: // Part of loop body line 479, head labeled $Lt_0_40962 add.s32 %r84, %r84, 1; add.u64 %rd20, %rd20, 4; setp.ne.s32 %p16, %r4, %r84; @%p16 bra $Lt_0_40962; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r93, %r75; mov.s32 %r94, %r76; bra.uni $Lt_0_39938; $Lt_0_53250: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r93, %r75; mov.s32 %r94, %r76; bra.uni $Lt_0_39938; $Lt_0_40194: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r93, %r75; mov.s32 %r94, %r76; $Lt_0_39938: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r6, 0; bra.uni $Lt_0_38914; $Lt_0_39170: // Part of loop body line 395, head labeled $Lt_0_26370 cvt.s32.s8 %r12, %rh41; cvt.s32.s8 %r8, %rh40; $Lt_0_38914: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 546 0 mov.u32 %r95, 0; setp.ne.s32 %p17, %r8, %r95; @%p17 bra $L_0_38658; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r96, %r73; cvt.u64.s32 %rd25, %r96; mul.lo.u64 %rd26, %rd25, 4; add.u64 %rd27, %rd4, %rd26; ld.local.s32 %r97, [%rd27+4]; mov.u32 %r98, -1024; setp.eq.s32 %p18, %r97, %r98; @%p18 bra $L_0_38402; $L_0_38658: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r99, 1; bra.uni $L_0_38146; $L_0_38402: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r99, 0; $L_0_38146: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r100, %r99; cvt.s32.s8 %r101, %rh43; set.eq.u32.s32 %r102, %r101, %r12; neg.s32 %r103, %r102; cvt.s8.s32 %r104, %r99; mov.s32 %r105, 0; set.eq.u32.s32 %r106, %r104, %r105; neg.s32 %r107, %r106; and.b32 %r108, %r103, %r107; mov.u32 %r109, 0; setp.eq.s32 %p19, %r108, %r109; @%p19 bra $Lt_0_43522; // Part of loop body line 395, head labeled $Lt_0_26370 cvt.s32.s8 %r110, %rh38; mov.u32 %r111, 1; setp.ne.s32 %p20, %r110, %r111; @%p20 bra $Lt_0_42754; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 564 0 mov.s16 %rh44, 2; mov.s16 %rh38, %rh44; .loc 16 565 0 cvt.s32.s8 %r112, %rh39; mov.s32 %r113, 0; set.eq.u32.s32 %r114, %r112, %r113; neg.s32 %r115, %r114; cvt.s8.s32 %rh39, %r115; .loc 16 566 0 mov.s32 %r8, 1; cvt.s8.s32 %rh40, %r8; .loc 16 567 0 cvt.s32.s8 %r116, %rh35; add.s32 %r117, %r116, %r12; shr.s32 %r118, %r117, 31; mov.s32 %r119, 1; and.b32 %r120, %r118, %r119; add.s32 %r121, %r120, %r117; shr.s32 %r122, %r121, 1; mov.s32 %r1, %r122; setp.ne.s32 %p21, %r122, %r12; @%p21 bra $Lt_0_43266; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 572 0 mov.s16 %rh45, 1; mov.s16 %rh37, %rh45; mov.s32 %r123, 1; bra.uni $Lt_0_43010; $Lt_0_43266: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r123, 0; $Lt_0_43010: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r100, 1; bra.uni $Lt_0_42498; $Lt_0_42754: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r123, 1; $Lt_0_42498: // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r124, 0; setp.eq.s32 %p22, %r123, %r124; @%p22 bra $Lt_0_43522; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 582 0 mov.s16 %rh46, 0; mov.s16 %rh38, %rh46; .loc 16 583 0 mov.s16 %rh47, %rh48; mov.s16 %rh39, %rh47; .loc 16 587 0 mov.s16 %rh49, %rh36; mov.s16 %rh50, %rh37; mul.wide.s16 %r125, %rh49, %rh50; cvt.s64.s32 %rd28, %r125; mov.s64 %rd29, %rd16; add.u64 %rd30, %rd28, %rd29; mov.s64 %rd16, %rd30; .loc 16 593 0 cvt.s32.s8 %r1, %rh35; .loc 16 594 0 mov.s16 %rh51, 0; mov.s16 %rh43, %rh51; .loc 16 600 0 mov.s16 %rh52, 0; mov.s16 %rh37, %rh52; mov.s16 %rh53, 0; mov.s16 %rh36, %rh53; mov.s32 %r100, 0; $Lt_0_43522: $Lt_0_41986: // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r126, 0; setp.eq.s32 %p23, %r100, %r126; @%p23 bra $Lt_0_44290; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 609 0 mov.s32 %r68, %r73; cvt.u64.s32 %rd31, %r68; mul.lo.u64 %rd32, %rd31, 4; add.u64 %rd33, %rd4, %rd32; ld.local.s32 %r127, [%rd33+4]; mov.u32 %r128, 0; setp.eq.s32 %p24, %r8, %r128; @%p24 bra $Lt_0_44802; // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r129, 1; bra.uni $Lt_0_45058; $Lt_0_44802: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 260 0 neg.s32 %r129, %r127; mov.u32 %r130, 0; setp.le.s32 %p25, %r129, %r130; @%p25 bra $Lt_0_45058; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 263 0 mov.s32 %r131, 32; mul24.lo.s32 %r129, %r129, %r131; $Lt_0_45058: $Lt_0_44546: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 609 0 st.local.s32 [%rd33+4], %r129; .loc 16 612 0 mov.s32 %r8, 0; .loc 16 609 0 cvt.s32.s8 %r10, %rh39; mov.s32 %r132, 0; setp.ne.s32 %p26, %r10, %r132; mov.s32 %r75, %r93; add.s32 %r133, %r129, %r75; mov.s32 %r76, %r94; add.s32 %r134, %r129, %r76; selp.s32 %r135, %r133, %r134, %p26; .loc 16 70 0 mov.s32 %r77, 0; $Lt_0_33026: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd34, %r77; mul.lo.u64 %rd35, %rd34, 2; add.u64 %rd19, %rd5, %rd35; ld.local.s16 %r78, [%rd19+0]; mov.s32 %r136, 0; setp.eq.s32 %p27, %r78, %r136; @%p27 bra $Lt_0_54786; // Part of loop body line 72, head labeled $Lt_0_33026 .loc 16 73 0 cvt.s16.s32 %r137, %r135; setp.eq.s32 %p28, %r78, %r137; @%p28 bra $Lt_0_54786; // Part of loop body line 72, head labeled $Lt_0_33026 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_33026; $Lt_0_54786: $Lt_0_1282: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 624 0 @!%p27 bra $Lt_0_45826; // Part of loop body line 395, head labeled $Lt_0_26370 @!%p26 bra $Lt_0_46338; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 635 0 mov.s32 %r75, %r135; mov.s32 %r93, %r75; bra.uni $Lt_0_46082; $Lt_0_46338: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 636 0 mov.s32 %r76, %r135; mov.s32 %r94, %r76; $Lt_0_46082: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 639 0 add.s32 %r138, %r12, 1; cvt.s8.s32 %r12, %r138; cvt.s8.s32 %rh41, %r12; cvt.s32.s8 %r11, %rh38; mov.s32 %r139, 0; setp.eq.s32 %p29, %r11, %r139; @!%p29 bra $Lt_0_46594; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 640 0 mov.s32 %r140, 0; set.eq.u32.s32 %r141, %r10, %r140; neg.s32 %r10, %r141; cvt.s8.s32 %rh39, %r10; $Lt_0_46594: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 641 0 add.s32 %r142, %r68, 1; mov.s32 %r73, %r142; .loc 16 647 0 mov.s32 %r8, 1; cvt.s8.s32 %rh40, %r8; .loc 16 648 0 cvt.s16.s32 %r143, %r135; st.local.s16 [%rd19+0], %r143; selp.s32 %r144, 1, 0, %p29; set.ne.u32.s32 %r145, %r1, %r12; neg.s32 %r146, %r145; and.b32 %r147, %r144, %r146; mov.u32 %r148, 0; setp.eq.s32 %p30, %r147, %r148; @%p30 bra $Lt_0_47618; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 663 0 shr.s32 %r149, %r76, 31; mov.s32 %r150, 1023; and.b32 %r151, %r149, %r150; add.s32 %r152, %r151, %r76; shr.s32 %r153, %r152, 10; mul.lo.s32 %r154, %r153, 1024; sub.s32 %r155, %r76, %r154; shr.s32 %r156, %r75, 31; mov.s32 %r157, 1023; and.b32 %r158, %r156, %r157; add.s32 %r159, %r158, %r75; shr.s32 %r160, %r159, 10; mul.lo.s32 %r161, %r160, 1024; sub.s32 %r162, %r75, %r161; shr.s32 %r163, %r76, 10; shr.s32 %r164, %r75, 10; sub.s32 %r165, %r163, %r164; abs.s32 %r166, %r165; shr.s32 %r167, %r155, 5; shr.s32 %r168, %r162, 5; sub.s32 %r169, %r167, %r168; abs.s32 %r170, %r169; shr.s32 %r171, %r155, 31; mov.s32 %r172, 31; and.b32 %r173, %r171, %r172; add.s32 %r174, %r173, %r155; shr.s32 %r175, %r174, 5; mul.lo.s32 %r176, %r175, 32; sub.s32 %r177, %r155, %r176; shr.s32 %r178, %r162, 31; mov.s32 %r179, 31; and.b32 %r180, %r178, %r179; add.s32 %r181, %r180, %r162; shr.s32 %r182, %r181, 5; mul.lo.s32 %r183, %r182, 32; sub.s32 %r184, %r162, %r183; sub.s32 %r185, %r177, %r184; abs.s32 %r186, %r185; add.s32 %r187, %r170, %r186; add.s32 %r188, %r166, %r187; sub.s32 %r189, %r1, %r12; setp.le.s32 %p31, %r188, %r189; @%p31 bra $Lt_0_47618; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 686 0 cvt.s8.s32 %rh48, %r10; .loc 16 687 0 mov.s32 %r11, 1; cvt.s8.s32 %rh38, %r11; .loc 16 690 0 cvt.s8.s32 %rh43, %r12; .loc 16 691 0 mov.s16 %rh54, 0; mov.s16 %rh36, %rh54; .loc 16 692 0 cvt.s32.s8 %r190, %rh35; add.s32 %r191, %r190, %r12; shr.s32 %r192, %r191, 31; mov.s32 %r193, 1; and.b32 %r194, %r192, %r193; add.s32 %r195, %r194, %r191; shr.s32 %r196, %r195, 1; shr.s32 %r197, %r191, 31; mov.s32 %r198, 1; and.b32 %r199, %r197, %r198; add.s32 %r200, %r199, %r191; shr.s32 %r201, %r200, 1; mul.lo.s32 %r202, %r201, 2; sub.s32 %r203, %r191, %r202; add.s32 %r1, %r196, %r203; $Lt_0_47618: $Lt_0_47106: // Part of loop body line 395, head labeled $Lt_0_26370 setp.eq.s32 %p32, %r1, %r12; @!%p32 bra $Lt_0_44034; // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r204, 0; setp.ne.s32 %p33, %r11, %r204; @%p33 bra $Lt_0_48898; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 709 0 mov.s64 %rd36, %rd16; add.u64 %rd37, %rd36, 1; mov.s64 %rd16, %rd37; bra.uni $Lt_0_44034; $Lt_0_48898: // Part of loop body line 395, head labeled $Lt_0_26370 mov.u32 %r205, 1; setp.ne.s32 %p34, %r11, %r205; @%p34 bra $Lt_0_49410; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 713 0 mov.s16 %rh55, %rh36; add.s16 %rh56, %rh55, 1; mov.s16 %rh36, %rh56; bra.uni $Lt_0_44034; $Lt_0_49410: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 714 0 mov.s16 %rh57, %rh37; add.s16 %rh58, %rh57, 1; mov.s16 %rh37, %rh58; bra.uni $Lt_0_44034; $Lt_0_45826: // Part of loop body line 395, head labeled $Lt_0_26370 cvt.s8.s32 %rh40, %r8; setp.eq.s32 %p32, %r1, %r12; bra.uni $Lt_0_44034; $Lt_0_44290: // Part of loop body line 395, head labeled $Lt_0_26370 setp.eq.s32 %p32, %r1, %r12; $Lt_0_44034: // Part of loop body line 395, head labeled $Lt_0_26370 selp.s32 %r206, 1, 0, %p32; mov.s32 %r207, 0; set.eq.u32.s32 %r208, %r100, %r207; neg.s32 %r209, %r208; or.b32 %r210, %r206, %r209; mov.u32 %r211, 0; setp.eq.s32 %p35, %r210, %r211; @%p35 bra $Lt_0_49666; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 733 0 sub.s32 %r212, %r12, 1; cvt.s8.s32 %r12, %r212; cvt.s8.s32 %rh41, %r12; cvt.s32.s8 %r10, %rh39; cvt.s32.s8 %r213, %rh38; mov.u32 %r214, 0; setp.ne.s32 %p36, %r213, %r214; @%p36 bra $Lt_0_50178; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 734 0 mov.s32 %r215, 0; set.eq.u32.s32 %r216, %r10, %r215; neg.s32 %r10, %r216; cvt.s8.s32 %rh39, %r10; $Lt_0_50178: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r68, %r73; cvt.u64.s32 %rd38, %r68; mul.lo.u64 %rd39, %rd38, 4; add.u64 %rd40, %rd4, %rd39; ld.local.s32 %r217, [%rd40+0]; mov.u32 %r218, 0; setp.eq.s32 %p37, %r10, %r218; @%p37 bra $Lt_0_50946; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 70 0 mov.s32 %r75, %r93; mov.s32 %r77, 0; $Lt_0_36098: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd41, %r77; mul.lo.u64 %rd42, %rd41, 2; add.u64 %rd19, %rd5, %rd42; ld.local.s16 %r78, [%rd19+0]; mov.u32 %r219, 0; setp.eq.s32 %p38, %r78, %r219; @%p38 bra $Lt_0_55298; // Part of loop body line 72, head labeled $Lt_0_36098 .loc 16 73 0 cvt.s16.s32 %r220, %r75; setp.eq.s32 %p39, %r78, %r220; @%p39 bra $Lt_0_55298; // Part of loop body line 72, head labeled $Lt_0_36098 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_36098; $Lt_0_55298: $Lt_0_1026: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 742 0 mov.s32 %r84, %r77; .loc 16 743 0 sub.s32 %r221, %r75, %r217; mov.s32 %r93, %r221; bra.uni $Lt_0_50690; $Lt_0_50946: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 70 0 mov.s32 %r76, %r94; mov.s32 %r77, 0; $Lt_0_36354: // Loop body line 72 .loc 16 72 0 cvt.u64.s32 %rd43, %r77; mul.lo.u64 %rd44, %rd43, 2; add.u64 %rd19, %rd5, %rd44; ld.local.s16 %r78, [%rd19+0]; mov.u32 %r222, 0; setp.eq.s32 %p40, %r78, %r222; @%p40 bra $Lt_0_55810; // Part of loop body line 72, head labeled $Lt_0_36354 .loc 16 73 0 cvt.s16.s32 %r223, %r76; setp.eq.s32 %p41, %r78, %r223; @%p41 bra $Lt_0_55810; // Part of loop body line 72, head labeled $Lt_0_36354 .loc 16 70 0 add.s32 %r77, %r77, 1; bra.uni $Lt_0_36354; $Lt_0_55810: $Lt_0_770: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 745 0 mov.s32 %r84, %r77; .loc 16 746 0 sub.s32 %r224, %r76, %r217; mov.s32 %r94, %r224; $Lt_0_50690: // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 749 0 mov.s16 %rh59, 0; cvt.u64.s32 %rd45, %r84; mul.lo.u64 %rd46, %rd45, 2; add.u64 %rd47, %rd5, %rd46; st.local.s16 [%rd47+0], %rh59; .loc 16 750 0 mov.s32 %r8, 0; cvt.s8.s32 %rh40, %r8; .loc 16 760 0 sub.s32 %r225, %r68, 1; mov.s32 %r73, %r225; $Lt_0_49666: // Part of loop body line 395, head labeled $Lt_0_26370 mov.s32 %r226, 0; set.eq.u32.s32 %r227, %r8, %r226; neg.s32 %r228, %r227; set.gt.u32.s32 %r229, %r4, %r12; neg.s32 %r230, %r229; and.b32 %r231, %r228, %r230; mov.u32 %r232, 0; setp.eq.s32 %p42, %r231, %r232; @%p42 bra $Lt_0_26370; // Part of loop body line 395, head labeled $Lt_0_26370 .loc 16 777 0 mov.s64 %rd48, %rd16; mov.s32 %r233, %r74; cvt.u64.s32 %rd49, %r233; mul.lo.u64 %rd50, %rd49, 8; add.u64 %rd51, %rd1, %rd50; st.global.u64 [%rd51+0], %rd48; mov.s32 %r6, 1; bra.uni $Lt_0_26370; $Lt_0_514: .loc 16 815 0 exit; $LDWend__Z10saw_trimerv: } // _Z10saw_trimerv