0-hero's picture
Add files using upload-large-folder tool
79f9b39 verified
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
@global_smem = external addrspace(3) global [0 x i8]
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
define void @triton__0d1d2d3d4d5d6d7d8de9de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %6, ptr addrspace(1) %7, i32 %8, i32 %9) local_unnamed_addr !dbg !7 {
%11 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
%12 = and i32 %11, 31, !dbg !10
%13 = lshr i32 %11, 5, !dbg !10
%14 = and i32 %13, 1, !dbg !10
%urem = and i32 %11, 63, !dbg !10
%15 = shl nuw nsw i32 %urem, 2, !dbg !10
%16 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #6, !dbg !11
%17 = shl i32 %16, 8, !dbg !12
%18 = or i32 %17, %15, !dbg !13
%19 = sext i32 %18 to i64, !dbg !14
%20 = getelementptr float, ptr addrspace(1) %1, i64 %19, !dbg !14
%21 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %20, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !15
%22 = extractvalue { i32, i32, i32, i32 } %21, 0, !dbg !15
%23 = extractvalue { i32, i32, i32, i32 } %21, 1, !dbg !15
%24 = extractvalue { i32, i32, i32, i32 } %21, 2, !dbg !15
%25 = extractvalue { i32, i32, i32, i32 } %21, 3, !dbg !15
%26 = bitcast i32 %24 to float, !dbg !15
%27 = bitcast i32 %25 to float, !dbg !15
%28 = getelementptr i16, ptr addrspace(1) %2, i64 %19, !dbg !16
%29 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %28, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !17
%30 = extractvalue { i32, i32 } %29, 0, !dbg !17
%31 = extractvalue { i32, i32 } %29, 1, !dbg !17
%32 = trunc i32 %30 to i16, !dbg !17
%extelt.offset = lshr i32 %30, 16, !dbg !17
%33 = trunc i32 %extelt.offset to i16, !dbg !17
%34 = trunc i32 %31 to i16, !dbg !17
%extelt.offset1 = lshr i32 %31, 16, !dbg !17
%35 = trunc i32 %extelt.offset1 to i16, !dbg !17
%36 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %32) #6, !dbg !18
%37 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %33) #6, !dbg !18
%38 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %34) #6, !dbg !18
%39 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %35) #6, !dbg !18
%40 = getelementptr i16, ptr addrspace(1) %3, i64 %19, !dbg !19
%41 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %40, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !20
%42 = extractvalue { i32, i32 } %41, 0, !dbg !20
%43 = extractvalue { i32, i32 } %41, 1, !dbg !20
%44 = trunc i32 %42 to i16, !dbg !20
%extelt.offset2 = lshr i32 %42, 16, !dbg !20
%45 = trunc i32 %extelt.offset2 to i16, !dbg !20
%46 = trunc i32 %43 to i16, !dbg !20
%extelt.offset3 = lshr i32 %43, 16, !dbg !20
%47 = trunc i32 %extelt.offset3 to i16, !dbg !20
%48 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %44) #6, !dbg !21
%49 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %45) #6, !dbg !21
%50 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %46) #6, !dbg !21
%51 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %47) #6, !dbg !21
%52 = getelementptr i16, ptr addrspace(1) %4, i64 %19, !dbg !22
%53 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %52, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !23
%54 = extractvalue { i32, i32 } %53, 0, !dbg !23
%55 = extractvalue { i32, i32 } %53, 1, !dbg !23
%56 = trunc i32 %54 to i16, !dbg !23
%extelt.offset4 = lshr i32 %54, 16, !dbg !23
%57 = trunc i32 %extelt.offset4 to i16, !dbg !23
%58 = trunc i32 %55 to i16, !dbg !23
%extelt.offset5 = lshr i32 %55, 16, !dbg !23
%59 = trunc i32 %extelt.offset5 to i16, !dbg !23
%60 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %56) #6, !dbg !24
%61 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %57) #6, !dbg !24
%62 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %58) #6, !dbg !24
%63 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %59) #6, !dbg !24
%64 = zext nneg i32 %15 to i64, !dbg !25
%65 = getelementptr float, ptr addrspace(1) %5, i64 %64, !dbg !25
%66 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_last.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %65, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !26
%67 = fadd float %38, %26, !dbg !27
%68 = fadd float %39, %27, !dbg !27
%69 = fadd float %67, %50, !dbg !28
%70 = fadd float %68, %51, !dbg !28
%71 = insertelement <2 x i32> poison, i32 %22, i64 0, !dbg !15
%72 = insertelement <2 x i32> %71, i32 %23, i64 1, !dbg !15
%73 = bitcast <2 x i32> %72 to <2 x float>, !dbg !15
%74 = insertelement <2 x float> poison, float %36, i64 0, !dbg !27
%75 = insertelement <2 x float> %74, float %37, i64 1, !dbg !27
%76 = fadd <2 x float> %75, %73, !dbg !27
%77 = insertelement <2 x float> poison, float %48, i64 0, !dbg !28
%78 = insertelement <2 x float> %77, float %49, i64 1, !dbg !28
%79 = fadd <2 x float> %76, %78, !dbg !28
%80 = insertelement <2 x float> poison, float %60, i64 0, !dbg !29
%81 = insertelement <2 x float> %80, float %61, i64 1, !dbg !29
%82 = fadd <2 x float> %79, %81, !dbg !29
%83 = fadd float %69, %62, !dbg !29
%84 = fadd float %70, %63, !dbg !29
%85 = extractelement <2 x float> %82, i64 0, !dbg !30
%86 = extractelement <2 x float> %82, i64 1, !dbg !30
%87 = fadd float %85, %86, !dbg !30
%88 = fadd float %87, %83, !dbg !30
%89 = fadd float %88, %84, !dbg !30
%90 = bitcast float %89 to i32, !dbg !36
%91 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %90, i32 16, i32 31), !dbg !36
%92 = bitcast i32 %91 to float, !dbg !36
%93 = fadd float %89, %92, !dbg !30
%94 = bitcast float %93 to i32, !dbg !36
%95 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %94, i32 8, i32 31), !dbg !36
%96 = bitcast i32 %95 to float, !dbg !36
%97 = fadd float %93, %96, !dbg !30
%98 = bitcast float %97 to i32, !dbg !36
%99 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %98, i32 4, i32 31), !dbg !36
%100 = bitcast i32 %99 to float, !dbg !36
%101 = fadd float %97, %100, !dbg !30
%102 = bitcast float %101 to i32, !dbg !36
%103 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %102, i32 2, i32 31), !dbg !36
%104 = bitcast i32 %103 to float, !dbg !36
%105 = fadd float %101, %104, !dbg !30
%106 = bitcast float %105 to i32, !dbg !36
%107 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %106, i32 1, i32 31), !dbg !36
%108 = bitcast i32 %107 to float, !dbg !36
%109 = fadd float %105, %108, !dbg !30
%110 = icmp eq i32 %12, 0, !dbg !36
%111 = zext nneg i32 %14 to i64, !dbg !36
%112 = getelementptr float, ptr addrspace(3) @global_smem, i64 %111, !dbg !36
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %112, float %109, i1 %110) #6, !dbg !36
tail call void @llvm.nvvm.barrier0(), !dbg !36
%113 = icmp slt i32 %11, 2, !dbg !36
%114 = sext i32 %11 to i64, !dbg !36
%115 = getelementptr float, ptr addrspace(3) @global_smem, i64 %114, !dbg !36
%116 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %115, i1 %113) #6, !dbg !36
%117 = bitcast float %116 to i32, !dbg !36
%118 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %117, i32 1, i32 31), !dbg !36
%119 = bitcast i32 %118 to float, !dbg !36
%120 = fadd float %116, %119, !dbg !30
%121 = and i32 %11, 1, !dbg !36
%122 = icmp eq i32 %121, 0, !dbg !36
%123 = and i1 %113, %122, !dbg !36
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %115, float %120, i1 %123) #6, !dbg !36
tail call void @llvm.nvvm.barrier0(), !dbg !36
%124 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !36
%125 = fadd float %124, 0.000000e+00, !dbg !38
%126 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %125, float 2.560000e+02) #6, !dbg !42
%127 = fsub float %85, %126, !dbg !43
%128 = fsub float %86, %126, !dbg !43
%129 = fsub float %83, %126, !dbg !43
%130 = fsub float %84, %126, !dbg !43
%131 = fmul float %127, %127, !dbg !44
%132 = fmul float %128, %128, !dbg !44
%133 = fmul float %129, %129, !dbg !44
%134 = fmul float %130, %130, !dbg !44
tail call void @llvm.nvvm.barrier0(), !dbg !45
%135 = fadd float %131, %132, !dbg !47
%136 = fadd float %133, %135, !dbg !47
%137 = fadd float %134, %136, !dbg !47
%138 = bitcast float %137 to i32, !dbg !45
%139 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %138, i32 16, i32 31), !dbg !45
%140 = bitcast i32 %139 to float, !dbg !45
%141 = fadd float %137, %140, !dbg !47
%142 = bitcast float %141 to i32, !dbg !45
%143 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %142, i32 8, i32 31), !dbg !45
%144 = bitcast i32 %143 to float, !dbg !45
%145 = fadd float %141, %144, !dbg !47
%146 = bitcast float %145 to i32, !dbg !45
%147 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %146, i32 4, i32 31), !dbg !45
%148 = bitcast i32 %147 to float, !dbg !45
%149 = fadd float %145, %148, !dbg !47
%150 = bitcast float %149 to i32, !dbg !45
%151 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %150, i32 2, i32 31), !dbg !45
%152 = bitcast i32 %151 to float, !dbg !45
%153 = fadd float %149, %152, !dbg !47
%154 = bitcast float %153 to i32, !dbg !45
%155 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %154, i32 1, i32 31), !dbg !45
%156 = bitcast i32 %155 to float, !dbg !45
%157 = fadd float %153, %156, !dbg !47
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %112, float %157, i1 %110) #6, !dbg !45
tail call void @llvm.nvvm.barrier0(), !dbg !45
%158 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %115, i1 %113) #6, !dbg !45
%159 = bitcast float %158 to i32, !dbg !45
%160 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %159, i32 1, i32 31), !dbg !45
%161 = bitcast i32 %160 to float, !dbg !45
%162 = fadd float %158, %161, !dbg !47
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %115, float %162, i1 %123) #6, !dbg !45
tail call void @llvm.nvvm.barrier0(), !dbg !45
%163 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !45
%164 = fadd float %163, 0.000000e+00, !dbg !50
%165 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %164, float 2.560000e+02) #6, !dbg !52
%166 = fadd float %165, 0x3EE4F8B580000000, !dbg !53
%167 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !54
%.not.i = icmp eq i32 %167, 0, !dbg !54
br i1 %.not.i, label %170, label %168, !dbg !54
168: ; preds = %10
%169 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %166), !dbg !54
br label %__nv_rsqrtf.exit, !dbg !54
170: ; preds = %10
%171 = tail call float @llvm.nvvm.rsqrt.approx.f(float %166), !dbg !54
br label %__nv_rsqrtf.exit, !dbg !54
__nv_rsqrtf.exit: ; preds = %168, %170
%.0.i = phi float [ %169, %168 ], [ %171, %170 ], !dbg !54
%172 = extractvalue { i32, i32, i32, i32 } %66, 3, !dbg !26
%173 = bitcast i32 %172 to float, !dbg !26
%174 = extractvalue { i32, i32, i32, i32 } %66, 2, !dbg !26
%175 = bitcast i32 %174 to float, !dbg !26
%176 = extractvalue { i32, i32, i32, i32 } %66, 1, !dbg !26
%177 = bitcast i32 %176 to float, !dbg !26
%178 = extractvalue { i32, i32, i32, i32 } %66, 0, !dbg !26
%179 = bitcast i32 %178 to float, !dbg !26
%180 = fmul float %127, %.0.i, !dbg !55
%181 = fmul float %128, %.0.i, !dbg !55
%182 = fmul float %129, %.0.i, !dbg !55
%183 = fmul float %130, %.0.i, !dbg !55
%184 = fmul float %180, %179, !dbg !56
%185 = fmul float %181, %177, !dbg !56
%186 = fmul float %182, %175, !dbg !56
%187 = fmul float %183, %173, !dbg !56
tail call void @llvm.nvvm.barrier0(), !dbg !57
%188 = sext i32 %16 to i64, !dbg !58
%189 = getelementptr float, ptr addrspace(1) %0, i64 %188, !dbg !58
%190 = icmp eq i32 %urem, 0, !dbg !59
%191 = bitcast float %.0.i to i32, !dbg !59
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %191, ptr addrspace(1) %189, i1 %190) #6, !dbg !59
%192 = getelementptr i16, ptr addrspace(1) %7, i64 %19, !dbg !60
%193 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %184) #6, !dbg !61
%194 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %185) #6, !dbg !61
%195 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %186) #6, !dbg !61
%196 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %187) #6, !dbg !61
%197 = insertelement <2 x i16> undef, i16 %193, i64 0, !dbg !61
%198 = insertelement <2 x i16> %197, i16 %194, i64 1, !dbg !61
%199 = bitcast <2 x i16> %198 to i32, !dbg !61
%200 = insertelement <2 x i16> undef, i16 %195, i64 0, !dbg !61
%201 = insertelement <2 x i16> %200, i16 %196, i64 1, !dbg !61
%202 = bitcast <2 x i16> %201 to i32, !dbg !61
tail call void asm sideeffect "@$3 st.global.v2.b32 [ $2 + 0 ], { $0, $1 };", "r,r,l,b"(i32 %199, i32 %202, ptr addrspace(1) %192, i1 true) #6, !dbg !61
%203 = getelementptr float, ptr addrspace(1) %6, i64 %188, !dbg !62
%204 = bitcast float %126 to i32, !dbg !63
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %204, ptr addrspace(1) %203, i1 %190) #6, !dbg !63
ret void, !dbg !64
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
; Function Attrs: convergent nocallback nounwind
declare void @llvm.nvvm.barrier0() #2
; Function Attrs: alwaysinline nounwind
define float @__nv_rsqrtf(float %x) local_unnamed_addr #3 {
%1 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6
%.not = icmp eq i32 %1, 0
br i1 %.not, label %4, label %2
2: ; preds = %0
%3 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %x)
br label %6
4: ; preds = %0
%5 = tail call float @llvm.nvvm.rsqrt.approx.f(float %x)
br label %6
6: ; preds = %4, %2
%.0 = phi float [ %3, %2 ], [ %5, %4 ]
ret float %.0
}
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare float @llvm.nvvm.rsqrt.approx.ftz.f(float) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare float @llvm.nvvm.rsqrt.approx.f(float) #5
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
attributes #2 = { convergent nocallback nounwind }
attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!nvvm.annotations = !{!4, !5, !5, !4}
!llvm.ident = !{!6}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!3 = !DIFile(filename: "cdxa5yqgsimvskocpuiz4ajfrjfcwys3opyrdv53xfphj4576qx7.py", directory: "/tmp/torchinductor_root/dx")
!4 = !{ptr @triton__0d1d2d3d4d5d6d7d8de9de, !"kernel", i32 1}
!5 = !{ptr @triton__0d1d2d3d4d5d6d7d8de9de, !"maxntidx", i32 64}
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6d7d8de9de", linkageName: "triton__0d1d2d3d4d5d6d7d8de9de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
!9 = !{}
!10 = !DILocation(line: 26, column: 26, scope: !7)
!11 = !DILocation(line: 23, column: 28, scope: !7)
!12 = !DILocation(line: 30, column: 40, scope: !7)
!13 = !DILocation(line: 30, column: 36, scope: !7)
!14 = !DILocation(line: 30, column: 30, scope: !7)
!15 = !DILocation(line: 30, column: 46, scope: !7)
!16 = !DILocation(line: 31, column: 30, scope: !7)
!17 = !DILocation(line: 31, column: 46, scope: !7)
!18 = !DILocation(line: 31, column: 67, scope: !7)
!19 = !DILocation(line: 32, column: 30, scope: !7)
!20 = !DILocation(line: 32, column: 46, scope: !7)
!21 = !DILocation(line: 32, column: 67, scope: !7)
!22 = !DILocation(line: 33, column: 30, scope: !7)
!23 = !DILocation(line: 33, column: 46, scope: !7)
!24 = !DILocation(line: 33, column: 67, scope: !7)
!25 = !DILocation(line: 34, column: 31, scope: !7)
!26 = !DILocation(line: 34, column: 36, scope: !7)
!27 = !DILocation(line: 36, column: 18, scope: !7)
!28 = !DILocation(line: 38, column: 18, scope: !7)
!29 = !DILocation(line: 40, column: 18, scope: !7)
!30 = !DILocation(line: 233, column: 15, scope: !31, inlinedAt: !34)
!31 = distinct !DILexicalBlockFile(scope: !33, file: !32, discriminator: 0)
!32 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
!33 = distinct !DILexicalBlockFile(scope: !7, file: !32, discriminator: 0)
!34 = !DILocation(line: 243, column: 36, scope: !31, inlinedAt: !35)
!35 = !DILocation(line: 45, column: 59, scope: !31)
!36 = !DILocation(line: 243, column: 36, scope: !33, inlinedAt: !37)
!37 = !DILocation(line: 45, column: 59, scope: !33)
!38 = !DILocation(line: 8, column: 15, scope: !39, inlinedAt: !41)
!39 = distinct !DILexicalBlockFile(scope: !7, file: !40, discriminator: 0)
!40 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
!41 = !DILocation(line: 45, column: 45, scope: !39)
!42 = !DILocation(line: 48, column: 20, scope: !7)
!43 = !DILocation(line: 49, column: 20, scope: !7)
!44 = !DILocation(line: 50, column: 20, scope: !7)
!45 = !DILocation(line: 243, column: 36, scope: !33, inlinedAt: !46)
!46 = !DILocation(line: 53, column: 59, scope: !33)
!47 = !DILocation(line: 233, column: 15, scope: !31, inlinedAt: !48)
!48 = !DILocation(line: 243, column: 36, scope: !31, inlinedAt: !49)
!49 = !DILocation(line: 53, column: 59, scope: !31)
!50 = !DILocation(line: 8, column: 15, scope: !39, inlinedAt: !51)
!51 = !DILocation(line: 53, column: 45, scope: !39)
!52 = !DILocation(line: 55, column: 20, scope: !7)
!53 = !DILocation(line: 57, column: 20, scope: !7)
!54 = !DILocation(line: 58, column: 26, scope: !7)
!55 = !DILocation(line: 60, column: 20, scope: !7)
!56 = !DILocation(line: 61, column: 20, scope: !7)
!57 = !DILocation(line: 63, column: 4, scope: !7)
!58 = !DILocation(line: 64, column: 28, scope: !7)
!59 = !DILocation(line: 64, column: 40, scope: !7)
!60 = !DILocation(line: 65, column: 25, scope: !7)
!61 = !DILocation(line: 65, column: 48, scope: !7)
!62 = !DILocation(line: 66, column: 25, scope: !7)
!63 = !DILocation(line: 66, column: 37, scope: !7)
!64 = !DILocation(line: 66, column: 4, scope: !7)