// // Generated by LLVM NVPTX Back-End // .version 8.2 .target sm_89 .address_size 64 // .globl triton__0d1d2de .visible .entry triton__0d1d2de( .param .u64 triton__0d1d2de_param_0, .param .u64 triton__0d1d2de_param_1, .param .u32 triton__0d1d2de_param_2 ) .maxntid 128, 1, 1 { .reg .pred %p<4>; .reg .b16 %rs<9>; .reg .b32 %r<31>; .reg .b64 %rd<8>; .loc 1 18 0 $L__func_begin0: .loc 1 18 0 ld.param.u64 %rd4, [triton__0d1d2de_param_0]; ld.param.u64 %rd5, [triton__0d1d2de_param_1]; $L__tmp0: .loc 1 21 36 mov.u32 %r22, %tid.x; shl.b32 %r23, %r22, 3; and.b32 %r24, %r23, 1016; .loc 1 20 28 mov.u32 %r1, %ctaid.x; .loc 1 20 33 shl.b32 %r25, %r1, 10; .loc 1 21 23 or.b32 %r26, %r25, %r24; .loc 1 24 30 mul.wide.s32 %rd6, %r26, 4; add.s64 %rd1, %rd4, %rd6; add.s64 %rd2, %rd1, 16; mov.pred %p1, -1; .loc 1 24 35 mov.u32 %r10, 0x0; mov.u32 %r11, 0x0; mov.u32 %r12, 0x0; mov.u32 %r13, 0x0; @%p1 ld.global.v4.b32 { %r10, %r11, %r12, %r13 }, [ %rd1 + 0 ]; mov.u32 %r14, 0x0; mov.u32 %r15, 0x0; mov.u32 %r16, 0x0; mov.u32 %r17, 0x0; @%p1 ld.global.v4.b32 { %r14, %r15, %r16, %r17 }, [ %rd2 + 0 ]; .loc 1 26 25 mul.wide.s32 %rd7, %r26, 2; add.s64 %rd3, %rd5, %rd7; .loc 1 26 36 cvt.rn.bf16.f32 %rs1, %r10; cvt.rn.bf16.f32 %rs2, %r11; cvt.rn.bf16.f32 %rs3, %r12; cvt.rn.bf16.f32 %rs4, %r13; cvt.rn.bf16.f32 %rs5, %r14; cvt.rn.bf16.f32 %rs6, %r15; cvt.rn.bf16.f32 %rs7, %r16; cvt.rn.bf16.f32 %rs8, %r17; mov.b32 %r27, {%rs1, %rs2}; mov.b32 %r28, {%rs3, %rs4}; mov.b32 %r29, {%rs5, %rs6}; mov.b32 %r30, {%rs7, %rs8}; @%p1 st.global.v4.b32 [ %rd3 + 0 ], { %r27, %r28, %r29, %r30 }; .loc 1 26 4 ret; $L__tmp1: $L__func_end0: } .file 1 "/tmp/torchinductor_root/5t/c5tryp5qwkhreijk7s5x327wofz54lwj4kvctuqdzv2vrf2xyons.py" .section .debug_abbrev { .b8 1 .b8 17 .b8 1 .b8 37 .b8 8 .b8 19 .b8 5 .b8 3 .b8 8 .b8 16 .b8 6 .b8 27 .b8 8 .b8 180 .b8 66 .b8 12 .b8 17 .b8 1 .b8 18 .b8 1 .b8 0 .b8 0 .b8 2 .b8 46 .b8 0 .b8 17 .b8 1 .b8 18 .b8 1 .b8 64 .b8 10 .b8 135 .b8 64 .b8 8 .b8 3 .b8 8 .b8 58 .b8 11 .b8 59 .b8 11 .b8 63 .b8 12 .b8 0 .b8 0 .b8 0 } .section .debug_info { .b32 176 .b8 2 .b8 0 .b32 .debug_abbrev .b8 8 .b8 1 .b8 116 .b8 114 .b8 105 .b8 116 .b8 111 .b8 110 .b8 0 .b8 2 .b8 0 .b8 99 .b8 53 .b8 116 .b8 114 .b8 121 .b8 112 .b8 53 .b8 113 .b8 119 .b8 107 .b8 104 .b8 114 .b8 101 .b8 105 .b8 106 .b8 107 .b8 55 .b8 115 .b8 53 .b8 120 .b8 51 .b8 50 .b8 55 .b8 119 .b8 111 .b8 102 .b8 122 .b8 53 .b8 52 .b8 108 .b8 119 .b8 106 .b8 52 .b8 107 .b8 118 .b8 99 .b8 116 .b8 117 .b8 113 .b8 100 .b8 122 .b8 118 .b8 50 .b8 118 .b8 114 .b8 102 .b8 50 .b8 120 .b8 121 .b8 111 .b8 110 .b8 115 .b8 46 .b8 112 .b8 121 .b8 0 .b32 .debug_line .b8 47 .b8 116 .b8 109 .b8 112 .b8 47 .b8 116 .b8 111 .b8 114 .b8 99 .b8 104 .b8 105 .b8 110 .b8 100 .b8 117 .b8 99 .b8 116 .b8 111 .b8 114 .b8 95 .b8 114 .b8 111 .b8 111 .b8 116 .b8 47 .b8 53 .b8 116 .b8 0 .b8 1 .b64 $L__func_begin0 .b64 $L__func_end0 .b8 2 .b64 $L__func_begin0 .b64 $L__func_end0 .b8 1 .b8 156 .b8 116 .b8 114 .b8 105 .b8 116 .b8 111 .b8 110 .b8 95 .b8 95 .b8 48 .b8 100 .b8 49 .b8 100 .b8 50 .b8 100 .b8 101 .b8 0 .b8 116 .b8 114 .b8 105 .b8 116 .b8 111 .b8 110 .b8 95 .b8 95 .b8 48 .b8 100 .b8 49 .b8 100 .b8 50 .b8 100 .b8 101 .b8 0 .b8 1 .b8 18 .b8 1 .b8 0 } .section .debug_pubnames { .b32 $L__pubNames_end0-$L__pubNames_start0 $L__pubNames_start0: .b8 2 .b8 0 .b32 .debug_info .b32 180 .b32 125 .b8 116 .b8 114 .b8 105 .b8 116 .b8 111 .b8 110 .b8 95 .b8 95 .b8 48 .b8 100 .b8 49 .b8 100 .b8 50 .b8 100 .b8 101 .b8 0 .b32 0 $L__pubNames_end0: } .section .debug_pubtypes { .b32 $L__pubTypes_end0-$L__pubTypes_start0 $L__pubTypes_start0: .b8 2 .b8 0 .b32 .debug_info .b32 180 .b32 0 $L__pubTypes_end0: } .section .debug_loc { }