ELF3NJ##@8@.shstrtab.strtab.symtab.text.main_$ck_L29_2.nv.info.main_$ck_L29_2.nv.shared.main_$ck_L29_2.nv.info.nv.constant0.main_$ck_L29_2.nv_debug_line_sass.rel.nv_debug_line_sass.nv_debug_line_ptx.rel.nv_debug_line_ptx.debug_line.rel.debug_line.nv_debug_ptx_txt.shstrtab.strtab.symtabmain_$ck_L29_2.text.main_$ck_L29_2.nv.info.main_$ck_L29_2.nv.shared.main_$ck_L29_2.nv.inforeduc_share_ctl_c2__f1s125__17$$_redfold_t38__f1s221__18.nv.constant0.main_$ck_L29_2_param.nv_debug_line_sass.rel.nv_debug_line_sass.nv_debug_line_ptx.rel.nv_debug_line_ptx.debug_line.rel.debug_line.nv_debug_ptx_txt*W .Jd- vecAdd_reduction.f90  3~ .version 3.1.target sm_35.address_size 64.entry main_$ck_L29_2(.param .s64 $$arg_ptr_acc_b_t6_t55,.param .s64 $$arg_ptr_acc_a_t8_t56,.param .s64 $$arg_ptr_acc_c_t10_t57,.param .s64 $$arg_ptr_reduc_ctl_t15_t58,.param .s64 $$arg_ptr_reduc_val_t18_t59,.param .s64 $$arg_ptr_accshare_t21_t60 ){.shared .s32 reduc_share_ctl_c2__f1s125;.shared .u64 $$_redfold_t38__f1s221[128];.reg .pred %p<13>;.reg .b64 %bd<10>;.reg .s32 %s<8>;.reg .s64 %sd<34>;.reg .u32 %u<20>;.reg .f64 %fd<29>;mov.u32 %u7, %ntid.x; // vecAdd_reduction.f90:29mov.u32 %u8, %ctaid.x; // vecAdd_reduction.f90:29mov.u32 %u9, %tid.x; // vecAdd_reduction.f90:29mad.lo.u32 %u10, %u7, %u8, %u9; // vecAdd_reduction.f90:29cvt.s64.u32 %sd1, %u10; // vecAdd_reduction.f90:29 : $$id_t14mov.f64 %fd1, 0d0000000000000000; // vecAdd_reduction.f90:29 : $$_sum_t12setp.ge.s64 %p1, %sd1, 100000; // vecAdd_reduction.f90:29@%p1 bra $main_$ck_L29_2__l8__; // vecAdd_reduction.f90:29shl.b64 %bd1, %sd1, 3; // vecAdd_reduction.f90:30ld.param.s64 %sd2, [$$arg_ptr_acc_b_t6_t55];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_b_t6_t55add.s64 %sd3, %sd2, %bd1; // vecAdd_reduction.f90:30ld.param.s64 %sd4, [$$arg_ptr_acc_a_t8_t56];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_a_t8_t56add.s64 %sd5, %sd4, %bd1; // vecAdd_reduction.f90:30ld.global.nc.f64 %fd7, [%sd5]; // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_a_t8_t56,$$id_t14,ex].Lld.global.nc.f64 %fd8, [%sd3]; // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_b_t6_t55,$$id_t14,ex].Ladd.rn.f64 %fd9, %fd7, %fd8; // vecAdd_reduction.f90:30ld.param.s64 %sd6, [$$arg_ptr_acc_c_t10_t57];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_c_t10_t57add.s64 %sd7, %sd6, %bd1; // vecAdd_reduction.f90:30st.global.f64 [%sd7], %fd9; // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_c_t10_t57,$$id_t14].Lmov.f64 %fd1, %fd9; // vecAdd_reduction.f90:31 : $$_sum_t12$main_$ck_L29_2__l8__:mov.u32 %u11, %tid.x; // vecAdd_reduction.f90:32mov.u32 %u1, %tid.x; // vecAdd_reduction.f90:32 : $$lcs_9_t49cvt.s64.u32 %sd8, %tid.x; // vecAdd_reduction.f90:32shl.b64 %bd2, %sd8, 3; // vecAdd_reduction.f90:32mov.s64 %sd9, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd10, %bd2, %sd9; // vecAdd_reduction.f90:32st.volatile.shared.f64 [%sd10], %fd1; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_9_t49].Lbar.sync 0; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd2, [%sd10]; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39setp.ge.u32 %p2, %u11, 64; // vecAdd_reduction.f90:32@%p2 bra $main_$ck_L29_2__l11__; // vecAdd_reduction.f90:32cvt.s64.u32 %sd11, %u1; // vecAdd_reduction.f90:32shl.b64 %bd3, %sd11, 3; // vecAdd_reduction.f90:32mov.s64 %sd12, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd13, %bd3, %sd12; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd10, [%sd13 + 512]; // vecAdd_reduction.f90:32 : 64[ _&$$_redfold_t38,$$lcs_9_t49].Ladd.rn.f64 %fd2, %fd2, %fd10; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd13], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_9_t49].L$main_$ck_L29_2__l11__:bar.sync 0; // vecAdd_reduction.f90:32mov.u32 %u12, %tid.x; // vecAdd_reduction.f90:32mov.u32 %u2, %tid.x; // vecAdd_reduction.f90:32 : $$lcs_10_t50setp.ge.u32 %p3, %u12, 32; // vecAdd_reduction.f90:32@%p3 bra $main_$ck_L29_2__l14__; // vecAdd_reduction.f90:32cvt.s64.u32 %sd14, %u2; // vecAdd_reduction.f90:32shl.b64 %bd4, %sd14, 3; // vecAdd_reduction.f90:32mov.s64 %sd15, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd16, %bd4, %sd15; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd11, [%sd16 + 256]; // vecAdd_reduction.f90:32 : 32[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd2, %fd2, %fd11; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd16], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lld.volatile.shared.f64 %fd12, [%sd16 + 128]; // vecAdd_reduction.f90:32 : 16[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd2, %fd2, %fd12; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd16], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lld.volatile.shared.f64 %fd13, [%sd16 + 64]; // vecAdd_reduction.f90:32 : 8[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd2, %fd2, %fd13; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd16], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lld.volatile.shared.f64 %fd14, [%sd16 + 32]; // vecAdd_reduction.f90:32 : 4[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd2, %fd2, %fd14; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd16], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lld.volatile.shared.f64 %fd15, [%sd16 + 16]; // vecAdd_reduction.f90:32 : 2[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd2, %fd2, %fd15; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39st.volatile.shared.f64 [%sd16], %fd2; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lld.volatile.shared.f64 %fd16, [%sd16 + 8]; // vecAdd_reduction.f90:32 : 1[ _&$$_redfold_t38,$$lcs_10_t50].Ladd.rn.f64 %fd17, %fd2, %fd16; // vecAdd_reduction.f90:32st.volatile.shared.f64 [%sd16], %fd17; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].Lmov.f64 %fd2, %fd17; // vecAdd_reduction.f90:32 : $redfold_left_temp_t39$main_$ck_L29_2__l14__:bar.sync 0; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd4, [$$_redfold_t38__f1s221];// vecAdd_reduction.f90:32 : $$reduc_pvt_t17mov.f64 %fd3, %fd4; // vecAdd_reduction.f90:32 : $redfinal_t41bar.sync 0; // vecAdd_reduction.f90:32mov.u32 %u13, %tid.x; // vecAdd_reduction.f90:32setp.ne.u32 %p4, %u13, 0; // vecAdd_reduction.f90:32@%p4 bra $main_$ck_L29_2__l17__; // vecAdd_reduction.f90:32cvt.s64.u32 %sd17, %ctaid.x; // vecAdd_reduction.f90:32shl.b64 %bd5, %sd17, 3; // vecAdd_reduction.f90:32ld.param.s64 %sd18, [$$arg_ptr_reduc_val_t18_t59];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_val_t18_t59add.s64 %sd19, %sd18, %bd5; // vecAdd_reduction.f90:32st.global.f64 [%sd19], %fd3; // vecAdd_reduction.f90:32 : 0[$$arg_ptr_reduc_val_t18_t59,_acc_read_hw_reg(%ctaid.x)].L$main_$ck_L29_2__l17__:membar.gl; // vecAdd_reduction.f90:32mov.u32 %u14, %tid.x; // vecAdd_reduction.f90:32setp.ne.u32 %p5, %u14, 0; // vecAdd_reduction.f90:32@%p5 bra $main_$ck_L29_2__l20__; // vecAdd_reduction.f90:32cvt.s32.u32 %s2, %nctaid.x; // vecAdd_reduction.f90:32sub.s32 %s3, 1, %s2; // vecAdd_reduction.f90:32mov.u32 %u15, %ctaid.x; // vecAdd_reduction.f90:32setp.eq.u32 %p6, %u15, 0; // vecAdd_reduction.f90:32selp.s32 %s4, %s3, 1, %p6; // vecAdd_reduction.f90:32ld.param.s64 %sd20, [$$arg_ptr_reduc_ctl_t15_t58];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_ctl_t15_t58atom.global.add.s32 %s5, [%sd20], %s4; // vecAdd_reduction.f90:32add.s32 %s6, %s5, %s4; // vecAdd_reduction.f90:32st.volatile.shared.s32 [reduc_share_ctl_c2__f1s125], %s6;// vecAdd_reduction.f90:32 : reduc_share_ctl_c2$main_$ck_L29_2__l20__:bar.sync 0; // vecAdd_reduction.f90:32ld.volatile.shared.s32 %s7, [reduc_share_ctl_c2__f1s125];// vecAdd_reduction.f90:32 : reduc_share_ctl_c2setp.ne.s32 %p7, %s7, 0; // vecAdd_reduction.f90:32@%p7 bra $main_$ck_L29_2__l102__; // vecAdd_reduction.f90:32mov.f64 %fd4, 0d0000000000000000; // vecAdd_reduction.f90:32 : $$reduc_pvt_t17cvt.s32.u32 %s1, %tid.x; // vecAdd_reduction.f90:32 : $$induc_p13_t35mov.u32 %u16, %nctaid.x; // vecAdd_reduction.f90:32mov.u32 %u4, %nctaid.x; // vecAdd_reduction.f90:32 : $$lis_b2_t37setp.ge.u32 %p8, %s1, %u16; // vecAdd_reduction.f90:32@%p8 bra $main_$ck_L29_2__l31__; // vecAdd_reduction.f90:32mov.u32 %u3, %ntid.x; // vecAdd_reduction.f90:32 : $$lis_b1_t36$main_$ck_L29_2__l27__:cvt.s64.s32 %sd21, %s1; // vecAdd_reduction.f90:32shl.b64 %bd6, %sd21, 3; // vecAdd_reduction.f90:32ld.param.s64 %sd22, [$$arg_ptr_reduc_val_t18_t59];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_val_t18_t59add.s64 %sd23, %sd22, %bd6; // vecAdd_reduction.f90:32ld.global.f64 %fd18, [%sd23]; // vecAdd_reduction.f90:32 : 0[$$arg_ptr_reduc_val_t18_t59,$$induc_p13_t35].Ladd.rn.f64 %fd4, %fd18, %fd4; // vecAdd_reduction.f90:32 : $$reduc_pvt_t17add.u32 %s1, %s1, %u3; // vecAdd_reduction.f90:32 : $$induc_p13_t35setp.lt.u32 %p9, %s1, %u4; // vecAdd_reduction.f90:32@%p9 bra $main_$ck_L29_2__l27__; // vecAdd_reduction.f90:32$main_$ck_L29_2__l31__:mov.u32 %u17, %tid.x; // vecAdd_reduction.f90:32mov.u32 %u5, %tid.x; // vecAdd_reduction.f90:32 : $$lcs_13_t53cvt.s64.u32 %sd24, %tid.x; // vecAdd_reduction.f90:32shl.b64 %bd7, %sd24, 3; // vecAdd_reduction.f90:32mov.s64 %sd25, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd26, %bd7, %sd25; // vecAdd_reduction.f90:32st.volatile.shared.f64 [%sd26], %fd4; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_13_t53].Lbar.sync 0; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd5, [%sd26]; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42setp.ge.u32 %p10, %u17, 64; // vecAdd_reduction.f90:32@%p10 bra $main_$ck_L29_2__l34__; // vecAdd_reduction.f90:32cvt.s64.u32 %sd27, %u5; // vecAdd_reduction.f90:32shl.b64 %bd8, %sd27, 3; // vecAdd_reduction.f90:32mov.s64 %sd28, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd29, %bd8, %sd28; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd19, [%sd29 + 512]; // vecAdd_reduction.f90:32 : 64[ _&$$_redfold_t38,$$lcs_13_t53].Ladd.rn.f64 %fd5, %fd5, %fd19; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd29], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_13_t53].L$main_$ck_L29_2__l34__:bar.sync 0; // vecAdd_reduction.f90:32mov.u32 %u18, %tid.x; // vecAdd_reduction.f90:32mov.u32 %u6, %tid.x; // vecAdd_reduction.f90:32 : $$lcs_14_t54setp.ge.u32 %p11, %u18, 32; // vecAdd_reduction.f90:32@%p11 bra $main_$ck_L29_2__l37__; // vecAdd_reduction.f90:32cvt.s64.u32 %sd30, %u6; // vecAdd_reduction.f90:32shl.b64 %bd9, %sd30, 3; // vecAdd_reduction.f90:32mov.s64 %sd31, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32add.s64 %sd32, %bd9, %sd31; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd20, [%sd32 + 256]; // vecAdd_reduction.f90:32 : 32[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd5, %fd5, %fd20; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd32], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lld.volatile.shared.f64 %fd21, [%sd32 + 128]; // vecAdd_reduction.f90:32 : 16[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd5, %fd5, %fd21; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd32], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lld.volatile.shared.f64 %fd22, [%sd32 + 64]; // vecAdd_reduction.f90:32 : 8[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd5, %fd5, %fd22; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd32], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lld.volatile.shared.f64 %fd23, [%sd32 + 32]; // vecAdd_reduction.f90:32 : 4[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd5, %fd5, %fd23; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd32], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lld.volatile.shared.f64 %fd24, [%sd32 + 16]; // vecAdd_reduction.f90:32 : 2[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd5, %fd5, %fd24; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42st.volatile.shared.f64 [%sd32], %fd5; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lld.volatile.shared.f64 %fd25, [%sd32 + 8]; // vecAdd_reduction.f90:32 : 1[ _&$$_redfold_t38,$$lcs_14_t54].Ladd.rn.f64 %fd26, %fd5, %fd25; // vecAdd_reduction.f90:32st.volatile.shared.f64 [%sd32], %fd26; // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].Lmov.f64 %fd5, %fd26; // vecAdd_reduction.f90:32 : $redfold_left_temp_t42$main_$ck_L29_2__l37__:bar.sync 0; // vecAdd_reduction.f90:32ld.volatile.shared.f64 %fd4, [$$_redfold_t38__f1s221];// vecAdd_reduction.f90:32 : $$reduc_pvt_t17mov.f64 %fd6, %fd4; // vecAdd_reduction.f90:32 : $redfinal_t44bar.sync 0; // vecAdd_reduction.f90:32mov.u32 %u19, %tid.x; // vecAdd_reduction.f90:32setp.ne.u32 %p12, %u19, 0; // vecAdd_reduction.f90:32@%p12 bra $main_$ck_L29_2__l102__; // vecAdd_reduction.f90:32ld.param.s64 %sd33, [$$arg_ptr_accshare_t21_t60];// vecAdd_reduction.f90:32 : $$arg_ptr_accshare_t21_t60ld.global.f64 %fd27, [%sd33]; // vecAdd_reduction.f90:32 : 0[$$arg_ptr_accshare_t21_t60,0].Ladd.rn.f64 %fd28, %fd27, %fd6; // vecAdd_reduction.f90:32st.global.f64 [%sd33], %fd28; // vecAdd_reduction.f90:32 : 0[$$arg_ptr_accshare_t21_t60,0].L$main_$ck_L29_2__l102__:exit;} // main_$ck_L29_2  2k|}}񀀁~  ~{}}    0V- vecAdd_reduction.f90  @00 (!  ! ! ! ! !::bphBBRCZ0pC @D PD `DkpDpE HJNpDppJ