.version 3.1
      .target sm_35
      .address_size 64

      // Cray Fortran : Version 8.2.2  (u82103f82232i82242p82428a82022z82428)

      .file	1	"vecAdd_reduction.f90"

      .entry main_$ck_L29_2(
               .param .s64 $$arg_ptr_acc_b_t6_t55,
               .param .s64 $$arg_ptr_acc_a_t8_t56,
               .param .s64 $$arg_ptr_acc_c_t10_t57,
               .param .s64 $$arg_ptr_reduc_ctl_t15_t58,
               .param .s64 $$arg_ptr_reduc_val_t18_t59,
               .param .s64 $$arg_ptr_accshare_t21_t60 )
      {
      .shared .s32 reduc_share_ctl_c2__f1s125;
      .shared .u64 $$_redfold_t38__f1s221[128];
      //
      .reg .pred %p<13>;
      .reg .b64  %bd<10>;
      .reg .s32  %s<8>;
      .reg .s64  %sd<34>;
      .reg .u32  %u<20>;
      .reg .f64  %fd<29>;
      //
      .loc                 1  29  0
      .loc                 1  29  0
      mov.u32              %u7, %ntid.x;               // vecAdd_reduction.f90:29
      mov.u32              %u8, %ctaid.x;              // vecAdd_reduction.f90:29
      mov.u32              %u9, %tid.x;                // vecAdd_reduction.f90:29
      mad.lo.u32           %u10, %u7, %u8, %u9;        // vecAdd_reduction.f90:29
      cvt.s64.u32          %sd1, %u10;                 // vecAdd_reduction.f90:29 : $$id_t14
      mov.f64              %fd1, 0d0000000000000000;   // vecAdd_reduction.f90:29 : $$_sum_t12
      setp.ge.s64          %p1, %sd1, 100000;          // vecAdd_reduction.f90:29
      @%p1 bra             $main_$ck_L29_2__l8__;      // vecAdd_reduction.f90:29
      .loc                 1  30  0
      shl.b64              %bd1, %sd1, 3;              // vecAdd_reduction.f90:30
      ld.param.s64         %sd2, [$$arg_ptr_acc_b_t6_t55];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_b_t6_t55
      add.s64              %sd3, %sd2, %bd1;           // vecAdd_reduction.f90:30
      ld.param.s64         %sd4, [$$arg_ptr_acc_a_t8_t56];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_a_t8_t56
      add.s64              %sd5, %sd4, %bd1;           // vecAdd_reduction.f90:30
      ld.global.nc.f64     %fd7, [%sd5];               // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_a_t8_t56,$$id_t14,ex].L
      ld.global.nc.f64     %fd8, [%sd3];               // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_b_t6_t55,$$id_t14,ex].L
      add.rn.f64           %fd9, %fd7, %fd8;           // vecAdd_reduction.f90:30
      ld.param.s64         %sd6, [$$arg_ptr_acc_c_t10_t57];// vecAdd_reduction.f90:30 : $$arg_ptr_acc_c_t10_t57
      add.s64              %sd7, %sd6, %bd1;           // vecAdd_reduction.f90:30
      st.global.f64        [%sd7], %fd9;               // vecAdd_reduction.f90:30 : 0[$$arg_ptr_acc_c_t10_t57,$$id_t14].L
      .loc                 1  31  0
      mov.f64              %fd1, %fd9;                 // vecAdd_reduction.f90:31 : $$_sum_t12
$main_$ck_L29_2__l8__:
      .loc                 1  32  0
      mov.u32              %u11, %tid.x;               // vecAdd_reduction.f90:32
      mov.u32              %u1, %tid.x;                // vecAdd_reduction.f90:32 : $$lcs_9_t49
      cvt.s64.u32          %sd8, %tid.x;               // vecAdd_reduction.f90:32
      shl.b64              %bd2, %sd8, 3;              // vecAdd_reduction.f90:32
      mov.s64              %sd9, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd10, %bd2, %sd9;          // vecAdd_reduction.f90:32
      st.volatile.shared.f64 [%sd10], %fd1;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_9_t49].L
      bar.sync             0;                          // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd2, [%sd10];            // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      setp.ge.u32          %p2, %u11, 64;              // vecAdd_reduction.f90:32
      @%p2 bra             $main_$ck_L29_2__l11__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s64.u32          %sd11, %u1;                 // vecAdd_reduction.f90:32
      shl.b64              %bd3, %sd11, 3;             // vecAdd_reduction.f90:32
      mov.s64              %sd12, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd13, %bd3, %sd12;         // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd10, [%sd13 + 512];     // vecAdd_reduction.f90:32 : 64[ _&$$_redfold_t38,$$lcs_9_t49].L
      add.rn.f64           %fd2, %fd2, %fd10;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd13], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_9_t49].L
$main_$ck_L29_2__l11__:
      .loc                 1  32  0
      bar.sync             0;                          // vecAdd_reduction.f90:32
      mov.u32              %u12, %tid.x;               // vecAdd_reduction.f90:32
      mov.u32              %u2, %tid.x;                // vecAdd_reduction.f90:32 : $$lcs_10_t50
      setp.ge.u32          %p3, %u12, 32;              // vecAdd_reduction.f90:32
      @%p3 bra             $main_$ck_L29_2__l14__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s64.u32          %sd14, %u2;                 // vecAdd_reduction.f90:32
      shl.b64              %bd4, %sd14, 3;             // vecAdd_reduction.f90:32
      mov.s64              %sd15, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd16, %bd4, %sd15;         // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd11, [%sd16 + 256];     // vecAdd_reduction.f90:32 : 32[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd2, %fd2, %fd11;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd16], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      ld.volatile.shared.f64 %fd12, [%sd16 + 128];     // vecAdd_reduction.f90:32 : 16[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd2, %fd2, %fd12;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd16], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      ld.volatile.shared.f64 %fd13, [%sd16 + 64];      // vecAdd_reduction.f90:32 : 8[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd2, %fd2, %fd13;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd16], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      ld.volatile.shared.f64 %fd14, [%sd16 + 32];      // vecAdd_reduction.f90:32 : 4[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd2, %fd2, %fd14;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd16], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      ld.volatile.shared.f64 %fd15, [%sd16 + 16];      // vecAdd_reduction.f90:32 : 2[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd2, %fd2, %fd15;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
      st.volatile.shared.f64 [%sd16], %fd2;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      ld.volatile.shared.f64 %fd16, [%sd16 + 8];       // vecAdd_reduction.f90:32 : 1[ _&$$_redfold_t38,$$lcs_10_t50].L
      add.rn.f64           %fd17, %fd2, %fd16;         // vecAdd_reduction.f90:32
      st.volatile.shared.f64 [%sd16], %fd17;           // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_10_t50].L
      mov.f64              %fd2, %fd17;                // vecAdd_reduction.f90:32 : $redfold_left_temp_t39
$main_$ck_L29_2__l14__:
      .loc                 1  32  0
      bar.sync             0;                          // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd4, [$$_redfold_t38__f1s221];// vecAdd_reduction.f90:32 : $$reduc_pvt_t17
      mov.f64              %fd3, %fd4;                 // vecAdd_reduction.f90:32 : $redfinal_t41
      bar.sync             0;                          // vecAdd_reduction.f90:32
      mov.u32              %u13, %tid.x;               // vecAdd_reduction.f90:32
      setp.ne.u32          %p4, %u13, 0;               // vecAdd_reduction.f90:32
      @%p4 bra             $main_$ck_L29_2__l17__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s64.u32          %sd17, %ctaid.x;            // vecAdd_reduction.f90:32
      shl.b64              %bd5, %sd17, 3;             // vecAdd_reduction.f90:32
      ld.param.s64         %sd18, [$$arg_ptr_reduc_val_t18_t59];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_val_t18_t59
      add.s64              %sd19, %sd18, %bd5;         // vecAdd_reduction.f90:32
      st.global.f64        [%sd19], %fd3;              // vecAdd_reduction.f90:32 : 0[$$arg_ptr_reduc_val_t18_t59,_acc_read_hw_reg(%ctaid.x)].L
$main_$ck_L29_2__l17__:
      .loc                 1  32  0
      membar.gl;                                       // vecAdd_reduction.f90:32
      mov.u32              %u14, %tid.x;               // vecAdd_reduction.f90:32
      setp.ne.u32          %p5, %u14, 0;               // vecAdd_reduction.f90:32
      @%p5 bra             $main_$ck_L29_2__l20__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s32.u32          %s2, %nctaid.x;             // vecAdd_reduction.f90:32
      sub.s32              %s3, 1, %s2;                // vecAdd_reduction.f90:32
      mov.u32              %u15, %ctaid.x;             // vecAdd_reduction.f90:32
      setp.eq.u32          %p6, %u15, 0;               // vecAdd_reduction.f90:32
      selp.s32             %s4, %s3, 1, %p6;           // vecAdd_reduction.f90:32
      ld.param.s64         %sd20, [$$arg_ptr_reduc_ctl_t15_t58];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_ctl_t15_t58
      atom.global.add.s32  %s5, [%sd20], %s4;          // vecAdd_reduction.f90:32
      add.s32              %s6, %s5, %s4;              // vecAdd_reduction.f90:32
      st.volatile.shared.s32 [reduc_share_ctl_c2__f1s125], %s6;// vecAdd_reduction.f90:32 : reduc_share_ctl_c2
$main_$ck_L29_2__l20__:
      .loc                 1  32  0
      bar.sync             0;                          // vecAdd_reduction.f90:32
      ld.volatile.shared.s32 %s7, [reduc_share_ctl_c2__f1s125];// vecAdd_reduction.f90:32 : reduc_share_ctl_c2
      setp.ne.s32          %p7, %s7, 0;                // vecAdd_reduction.f90:32
      @%p7 bra             $main_$ck_L29_2__l102__;    // vecAdd_reduction.f90:32
      .loc                 1  32  0
      mov.f64              %fd4, 0d0000000000000000;   // vecAdd_reduction.f90:32 : $$reduc_pvt_t17
      cvt.s32.u32          %s1, %tid.x;                // vecAdd_reduction.f90:32 : $$induc_p13_t35
      mov.u32              %u16, %nctaid.x;            // vecAdd_reduction.f90:32
      mov.u32              %u4, %nctaid.x;             // vecAdd_reduction.f90:32 : $$lis_b2_t37
      setp.ge.u32          %p8, %s1, %u16;             // vecAdd_reduction.f90:32
      @%p8 bra             $main_$ck_L29_2__l31__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      mov.u32              %u3, %ntid.x;               // vecAdd_reduction.f90:32 : $$lis_b1_t36
$main_$ck_L29_2__l27__:
      .loc                 1  32  0
      cvt.s64.s32          %sd21, %s1;                 // vecAdd_reduction.f90:32
      shl.b64              %bd6, %sd21, 3;             // vecAdd_reduction.f90:32
      ld.param.s64         %sd22, [$$arg_ptr_reduc_val_t18_t59];// vecAdd_reduction.f90:32 : $$arg_ptr_reduc_val_t18_t59
      add.s64              %sd23, %sd22, %bd6;         // vecAdd_reduction.f90:32
      ld.global.f64        %fd18, [%sd23];             // vecAdd_reduction.f90:32 : 0[$$arg_ptr_reduc_val_t18_t59,$$induc_p13_t35].L
      add.rn.f64           %fd4, %fd18, %fd4;          // vecAdd_reduction.f90:32 : $$reduc_pvt_t17
      add.u32              %s1, %s1, %u3;              // vecAdd_reduction.f90:32 : $$induc_p13_t35
      setp.lt.u32          %p9, %s1, %u4;              // vecAdd_reduction.f90:32
      @%p9 bra             $main_$ck_L29_2__l27__;     // vecAdd_reduction.f90:32
$main_$ck_L29_2__l31__:
      .loc                 1  32  0
      mov.u32              %u17, %tid.x;               // vecAdd_reduction.f90:32
      mov.u32              %u5, %tid.x;                // vecAdd_reduction.f90:32 : $$lcs_13_t53
      cvt.s64.u32          %sd24, %tid.x;              // vecAdd_reduction.f90:32
      shl.b64              %bd7, %sd24, 3;             // vecAdd_reduction.f90:32
      mov.s64              %sd25, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd26, %bd7, %sd25;         // vecAdd_reduction.f90:32
      st.volatile.shared.f64 [%sd26], %fd4;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_13_t53].L
      bar.sync             0;                          // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd5, [%sd26];            // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      setp.ge.u32          %p10, %u17, 64;             // vecAdd_reduction.f90:32
      @%p10 bra            $main_$ck_L29_2__l34__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s64.u32          %sd27, %u5;                 // vecAdd_reduction.f90:32
      shl.b64              %bd8, %sd27, 3;             // vecAdd_reduction.f90:32
      mov.s64              %sd28, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd29, %bd8, %sd28;         // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd19, [%sd29 + 512];     // vecAdd_reduction.f90:32 : 64[ _&$$_redfold_t38,$$lcs_13_t53].L
      add.rn.f64           %fd5, %fd5, %fd19;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd29], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_13_t53].L
$main_$ck_L29_2__l34__:
      .loc                 1  32  0
      bar.sync             0;                          // vecAdd_reduction.f90:32
      mov.u32              %u18, %tid.x;               // vecAdd_reduction.f90:32
      mov.u32              %u6, %tid.x;                // vecAdd_reduction.f90:32 : $$lcs_14_t54
      setp.ge.u32          %p11, %u18, 32;             // vecAdd_reduction.f90:32
      @%p11 bra            $main_$ck_L29_2__l37__;     // vecAdd_reduction.f90:32
      .loc                 1  32  0
      cvt.s64.u32          %sd30, %u6;                 // vecAdd_reduction.f90:32
      shl.b64              %bd9, %sd30, 3;             // vecAdd_reduction.f90:32
      mov.s64              %sd31, $$_redfold_t38__f1s221;// vecAdd_reduction.f90:32
      add.s64              %sd32, %bd9, %sd31;         // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd20, [%sd32 + 256];     // vecAdd_reduction.f90:32 : 32[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd5, %fd5, %fd20;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd32], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      ld.volatile.shared.f64 %fd21, [%sd32 + 128];     // vecAdd_reduction.f90:32 : 16[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd5, %fd5, %fd21;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd32], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      ld.volatile.shared.f64 %fd22, [%sd32 + 64];      // vecAdd_reduction.f90:32 : 8[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd5, %fd5, %fd22;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd32], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      ld.volatile.shared.f64 %fd23, [%sd32 + 32];      // vecAdd_reduction.f90:32 : 4[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd5, %fd5, %fd23;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd32], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      ld.volatile.shared.f64 %fd24, [%sd32 + 16];      // vecAdd_reduction.f90:32 : 2[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd5, %fd5, %fd24;          // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
      st.volatile.shared.f64 [%sd32], %fd5;            // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      ld.volatile.shared.f64 %fd25, [%sd32 + 8];       // vecAdd_reduction.f90:32 : 1[ _&$$_redfold_t38,$$lcs_14_t54].L
      add.rn.f64           %fd26, %fd5, %fd25;         // vecAdd_reduction.f90:32
      st.volatile.shared.f64 [%sd32], %fd26;           // vecAdd_reduction.f90:32 : 0[ _&$$_redfold_t38,$$lcs_14_t54].L
      mov.f64              %fd5, %fd26;                // vecAdd_reduction.f90:32 : $redfold_left_temp_t42
$main_$ck_L29_2__l37__:
      .loc                 1  32  0
      bar.sync             0;                          // vecAdd_reduction.f90:32
      ld.volatile.shared.f64 %fd4, [$$_redfold_t38__f1s221];// vecAdd_reduction.f90:32 : $$reduc_pvt_t17
      mov.f64              %fd6, %fd4;                 // vecAdd_reduction.f90:32 : $redfinal_t44
      bar.sync             0;                          // vecAdd_reduction.f90:32
      mov.u32              %u19, %tid.x;               // vecAdd_reduction.f90:32
      setp.ne.u32          %p12, %u19, 0;              // vecAdd_reduction.f90:32
      @%p12 bra            $main_$ck_L29_2__l102__;    // vecAdd_reduction.f90:32
      .loc                 1  32  0
      ld.param.s64         %sd33, [$$arg_ptr_accshare_t21_t60];// vecAdd_reduction.f90:32 : $$arg_ptr_accshare_t21_t60
      ld.global.f64        %fd27, [%sd33];             // vecAdd_reduction.f90:32 : 0[$$arg_ptr_accshare_t21_t60,0].L
      add.rn.f64           %fd28, %fd27, %fd6;         // vecAdd_reduction.f90:32
      st.global.f64        [%sd33], %fd28;             // vecAdd_reduction.f90:32 : 0[$$arg_ptr_accshare_t21_t60,0].L
$main_$ck_L29_2__l102__:
      exit;
      } // main_$ck_L29_2