0-hero's picture
Add files using upload-large-folder tool
f67f72f verified
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
define void @triton__0d1d2d3d4d5d6d7de8(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %6, i64 %7, i64 %8) local_unnamed_addr !dbg !5 {
%10 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !8
%11 = lshr i32 %10, 3, !dbg !8
%12 = and i32 %11, 15, !dbg !8
%13 = or i32 %12, 16, !dbg !8
%14 = or i32 %12, 32, !dbg !8
%15 = or i32 %12, 48, !dbg !8
%16 = and i32 %10, 7, !dbg !9
%17 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #2, !dbg !10
%18 = sext i32 %17 to i64, !dbg !11
%19 = shl nsw i64 %18, 6, !dbg !12
%20 = zext nneg i32 %12 to i64
%21 = zext nneg i32 %13 to i64
%22 = zext nneg i32 %14 to i64
%23 = zext nneg i32 %15 to i64
%24 = or i64 %19, %20, !dbg !13
%25 = or i64 %19, %21, !dbg !13
%26 = or i64 %19, %22, !dbg !13
%27 = or i64 %19, %23, !dbg !13
%28 = getelementptr i64, ptr addrspace(1) %1, i64 %24, !dbg !14
%29 = getelementptr i64, ptr addrspace(1) %1, i64 %25, !dbg !14
%30 = getelementptr i64, ptr addrspace(1) %1, i64 %26, !dbg !14
%31 = getelementptr i64, ptr addrspace(1) %1, i64 %27, !dbg !14
%32 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %28, i1 true) #2, !dbg !15
%33 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %29, i1 true) #2, !dbg !15
%34 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %30, i1 true) #2, !dbg !15
%35 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %31, i1 true) #2, !dbg !15
%36 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %2, i1 true) #2, !dbg !16
%37 = bitcast i32 %36 to float, !dbg !16
%38 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %3, i1 true) #2, !dbg !17
%39 = bitcast i32 %38 to float, !dbg !17
%40 = mul nsw i64 %24, 50257, !dbg !18
%41 = mul nsw i64 %25, 50257, !dbg !18
%42 = mul nsw i64 %26, 50257, !dbg !18
%43 = mul nsw i64 %27, 50257, !dbg !18
%44 = insertelement <4 x i64> poison, i64 %32, i64 0, !dbg !19
%45 = insertelement <4 x i64> %44, i64 %33, i64 1, !dbg !19
%46 = insertelement <4 x i64> %45, i64 %34, i64 2, !dbg !19
%47 = insertelement <4 x i64> %46, i64 %35, i64 3, !dbg !19
%48 = icmp eq <4 x i64> %47, <i64 -1, i64 -1, i64 -1, i64 -1>, !dbg !19
%49 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %37, float %39) #2, !dbg !20
%50 = insertelement <4 x float> poison, float %49, i64 0, !dbg !21
%51 = shufflevector <4 x float> %50, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !21
%52 = select <4 x i1> %48, <4 x float> zeroinitializer, <4 x float> %51, !dbg !21
%53 = getelementptr float, ptr addrspace(1) %0, i64 %40
%54 = getelementptr float, ptr addrspace(1) %0, i64 %41
%55 = getelementptr float, ptr addrspace(1) %0, i64 %42
%56 = getelementptr float, ptr addrspace(1) %0, i64 %43
br label %57, !dbg !22
57: ; preds = %9, %57
%58 = phi i32 [ 0, %9 ], [ %81, %57 ]
%59 = phi <4 x float> [ zeroinitializer, %9 ], [ %80, %57 ]
%60 = or i32 %58, %16, !dbg !23
%61 = zext nneg i32 %60 to i64, !dbg !23
%62 = icmp ult i32 %60, 50257, !dbg !24
%63 = getelementptr float, ptr addrspace(1) %53, i64 %61, !dbg !25
%64 = getelementptr float, ptr addrspace(1) %54, i64 %61, !dbg !25
%65 = getelementptr float, ptr addrspace(1) %55, i64 %61, !dbg !25
%66 = getelementptr float, ptr addrspace(1) %56, i64 %61, !dbg !25
%67 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %63, i1 %62, i32 0, i1 %62) #2, !dbg !26
%68 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %64, i1 %62, i32 0, i1 %62) #2, !dbg !26
%69 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %65, i1 %62, i32 0, i1 %62) #2, !dbg !26
%70 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %66, i1 %62, i32 0, i1 %62) #2, !dbg !26
%71 = insertelement <4 x i32> poison, i32 %67, i64 0, !dbg !26
%72 = insertelement <4 x i32> %71, i32 %68, i64 1, !dbg !26
%73 = insertelement <4 x i32> %72, i32 %69, i64 2, !dbg !26
%74 = insertelement <4 x i32> %73, i32 %70, i64 3, !dbg !26
%75 = bitcast <4 x i32> %74 to <4 x float>, !dbg !26
%76 = fmul <4 x float> %52, %75, !dbg !27
%77 = insertelement <4 x i1> poison, i1 %62, i64 0, !dbg !28
%78 = shufflevector <4 x i1> %77, <4 x i1> poison, <4 x i32> zeroinitializer, !dbg !28
%79 = select <4 x i1> %78, <4 x float> %76, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, !dbg !28
%80 = fadd <4 x float> %59, %79, !dbg !28
%81 = add nuw nsw i32 %58, 8, !dbg !22
%82 = icmp ult i32 %58, 50249, !dbg !22
br i1 %82, label %57, label %83, !dbg !22
83: ; preds = %57
%84 = extractelement <4 x float> %80, i64 0, !dbg !29
%85 = bitcast float %84 to i32, !dbg !29
%86 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %85, i32 4, i32 31), !dbg !29
%87 = bitcast i32 %86 to float, !dbg !29
%88 = fadd float %84, %87, !dbg !33
%89 = bitcast float %88 to i32, !dbg !29
%90 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %89, i32 2, i32 31), !dbg !29
%91 = bitcast i32 %90 to float, !dbg !29
%92 = fadd float %88, %91, !dbg !33
%93 = bitcast float %92 to i32, !dbg !29
%94 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %93, i32 1, i32 31), !dbg !29
%95 = bitcast i32 %94 to float, !dbg !29
%96 = fadd float %92, %95, !dbg !33
%97 = extractelement <4 x float> %80, i64 1, !dbg !29
%98 = bitcast float %97 to i32, !dbg !29
%99 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %98, i32 4, i32 31), !dbg !29
%100 = bitcast i32 %99 to float, !dbg !29
%101 = fadd float %97, %100, !dbg !33
%102 = bitcast float %101 to i32, !dbg !29
%103 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %102, i32 2, i32 31), !dbg !29
%104 = bitcast i32 %103 to float, !dbg !29
%105 = fadd float %101, %104, !dbg !33
%106 = bitcast float %105 to i32, !dbg !29
%107 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %106, i32 1, i32 31), !dbg !29
%108 = bitcast i32 %107 to float, !dbg !29
%109 = fadd float %105, %108, !dbg !33
%110 = extractelement <4 x float> %80, i64 2, !dbg !29
%111 = bitcast float %110 to i32, !dbg !29
%112 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %111, i32 4, i32 31), !dbg !29
%113 = bitcast i32 %112 to float, !dbg !29
%114 = fadd float %110, %113, !dbg !33
%115 = bitcast float %114 to i32, !dbg !29
%116 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %115, i32 2, i32 31), !dbg !29
%117 = bitcast i32 %116 to float, !dbg !29
%118 = fadd float %114, %117, !dbg !33
%119 = bitcast float %118 to i32, !dbg !29
%120 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %119, i32 1, i32 31), !dbg !29
%121 = bitcast i32 %120 to float, !dbg !29
%122 = fadd float %118, %121, !dbg !33
%123 = extractelement <4 x float> %80, i64 3, !dbg !29
%124 = bitcast float %123 to i32, !dbg !29
%125 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %124, i32 4, i32 31), !dbg !29
%126 = bitcast i32 %125 to float, !dbg !29
%127 = fadd float %123, %126, !dbg !33
%128 = bitcast float %127 to i32, !dbg !29
%129 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %128, i32 2, i32 31), !dbg !29
%130 = bitcast i32 %129 to float, !dbg !29
%131 = fadd float %127, %130, !dbg !33
%132 = bitcast float %131 to i32, !dbg !29
%133 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %132, i32 1, i32 31), !dbg !29
%134 = bitcast i32 %133 to float, !dbg !29
%135 = fadd float %131, %134, !dbg !33
%136 = extractelement <4 x float> %52, i64 0, !dbg !37
%137 = extractelement <4 x float> %52, i64 1, !dbg !37
%138 = extractelement <4 x float> %52, i64 2, !dbg !37
%139 = extractelement <4 x float> %52, i64 3, !dbg !37
br label %140, !dbg !38
140: ; preds = %83, %140
%141 = phi i32 [ 0, %83 ], [ %217, %140 ]
%142 = or i32 %141, %16, !dbg !39
%143 = zext nneg i32 %142 to i64, !dbg !39
%144 = icmp ult i32 %142, 50257, !dbg !40
%145 = add nsw i64 %40, %143, !dbg !41
%146 = add nsw i64 %41, %143, !dbg !41
%147 = add nsw i64 %42, %143, !dbg !41
%148 = add nsw i64 %43, %143, !dbg !41
%149 = getelementptr i16, ptr addrspace(1) %4, i64 %145, !dbg !42
%150 = getelementptr i16, ptr addrspace(1) %4, i64 %146, !dbg !42
%151 = getelementptr i16, ptr addrspace(1) %4, i64 %147, !dbg !42
%152 = getelementptr i16, ptr addrspace(1) %4, i64 %148, !dbg !42
%153 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %149, i1 %144, i16 0, i1 %144) #2, !dbg !43
%154 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %150, i1 %144, i16 0, i1 %144) #2, !dbg !43
%155 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %151, i1 %144, i16 0, i1 %144) #2, !dbg !43
%156 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %152, i1 %144, i16 0, i1 %144) #2, !dbg !43
%157 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %153) #2, !dbg !44
%158 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %154) #2, !dbg !44
%159 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %155) #2, !dbg !44
%160 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %156) #2, !dbg !44
%161 = getelementptr float, ptr addrspace(1) %0, i64 %145, !dbg !45
%162 = getelementptr float, ptr addrspace(1) %0, i64 %146, !dbg !45
%163 = getelementptr float, ptr addrspace(1) %0, i64 %147, !dbg !45
%164 = getelementptr float, ptr addrspace(1) %0, i64 %148, !dbg !45
%165 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %161, i1 %144, i32 0, i1 %144) #2, !dbg !46
%166 = bitcast i32 %165 to float, !dbg !46
%167 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %162, i1 %144, i32 0, i1 %144) #2, !dbg !46
%168 = bitcast i32 %167 to float, !dbg !46
%169 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %163, i1 %144, i32 0, i1 %144) #2, !dbg !46
%170 = bitcast i32 %169 to float, !dbg !46
%171 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %164, i1 %144, i32 0, i1 %144) #2, !dbg !46
%172 = bitcast i32 %171 to float, !dbg !46
%173 = getelementptr i16, ptr addrspace(1) %5, i64 %145, !dbg !47
%174 = getelementptr i16, ptr addrspace(1) %5, i64 %146, !dbg !47
%175 = getelementptr i16, ptr addrspace(1) %5, i64 %147, !dbg !47
%176 = getelementptr i16, ptr addrspace(1) %5, i64 %148, !dbg !47
%177 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %173, i1 %144, i16 0, i1 %144) #2, !dbg !48
%178 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %174, i1 %144, i16 0, i1 %144) #2, !dbg !48
%179 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %175, i1 %144, i16 0, i1 %144) #2, !dbg !48
%180 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %176, i1 %144, i16 0, i1 %144) #2, !dbg !48
%181 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %177) #2, !dbg !49
%182 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %178) #2, !dbg !49
%183 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %179) #2, !dbg !49
%184 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %180) #2, !dbg !49
%185 = fmul float %136, %166, !dbg !37
%186 = fmul float %137, %168, !dbg !37
%187 = fmul float %138, %170, !dbg !37
%188 = fmul float %139, %172, !dbg !37
%189 = fmul float %181, 0x3FF7154760000000, !dbg !50
%190 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %189) #2, !dbg !50
%191 = fmul float %182, 0x3FF7154760000000, !dbg !50
%192 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %191) #2, !dbg !50
%193 = fmul float %183, 0x3FF7154760000000, !dbg !50
%194 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %193) #2, !dbg !50
%195 = fmul float %184, 0x3FF7154760000000, !dbg !50
%196 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %195) #2, !dbg !50
%197 = fmul float %96, %190, !dbg !51
%198 = fmul float %109, %192, !dbg !51
%199 = fmul float %122, %194, !dbg !51
%200 = fmul float %135, %196, !dbg !51
%201 = fsub float %185, %197, !dbg !52
%202 = fsub float %186, %198, !dbg !52
%203 = fsub float %187, %199, !dbg !52
%204 = fsub float %188, %200, !dbg !52
%205 = fadd float %157, %201, !dbg !53
%206 = fadd float %158, %202, !dbg !53
%207 = fadd float %159, %203, !dbg !53
%208 = fadd float %160, %204, !dbg !53
%209 = getelementptr i16, ptr addrspace(1) %6, i64 %145, !dbg !54
%210 = getelementptr i16, ptr addrspace(1) %6, i64 %146, !dbg !54
%211 = getelementptr i16, ptr addrspace(1) %6, i64 %147, !dbg !54
%212 = getelementptr i16, ptr addrspace(1) %6, i64 %148, !dbg !54
%213 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %205) #2, !dbg !55
%214 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %206) #2, !dbg !55
%215 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %207) #2, !dbg !55
%216 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %208) #2, !dbg !55
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %213, ptr addrspace(1) %209, i1 %144) #2, !dbg !55
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %214, ptr addrspace(1) %210, i1 %144) #2, !dbg !55
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %215, ptr addrspace(1) %211, i1 %144) #2, !dbg !55
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %216, ptr addrspace(1) %212, i1 %144) #2, !dbg !55
%217 = add nuw nsw i32 %141, 8, !dbg !38
%218 = icmp ult i32 %141, 50249, !dbg !38
br i1 %218, label %140, label %219, !dbg !38
219: ; preds = %140
ret void, !dbg !56
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
attributes #2 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!nvvm.annotations = !{!3, !4, !4, !3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "ckzgl7thb4xdfkfnd2tidks6mt5f3hauwfyjflbtzyepo5oxkvhk.py", directory: "/tmp/torchinductor_root/kz")
!3 = !{ptr @triton__0d1d2d3d4d5d6d7de8, !"kernel", i32 1}
!4 = !{ptr @triton__0d1d2d3d4d5d6d7de8, !"maxntidx", i32 128}
!5 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6d7de8", linkageName: "triton__0d1d2d3d4d5d6d7de8", scope: !2, file: !2, line: 18, type: !6, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
!6 = !DISubroutineType(cc: DW_CC_normal, types: !7)
!7 = !{}
!8 = !DILocation(line: 22, column: 44, scope: !5)
!9 = !DILocation(line: 24, column: 33, scope: !5)
!10 = !DILocation(line: 21, column: 28, scope: !5)
!11 = !DILocation(line: 21, column: 34, scope: !5)
!12 = !DILocation(line: 21, column: 46, scope: !5)
!13 = !DILocation(line: 22, column: 23, scope: !5)
!14 = !DILocation(line: 26, column: 30, scope: !5)
!15 = !DILocation(line: 26, column: 35, scope: !5)
!16 = !DILocation(line: 27, column: 19, scope: !5)
!17 = !DILocation(line: 29, column: 19, scope: !5)
!18 = !DILocation(line: 36, column: 46, scope: !5)
!19 = !DILocation(line: 38, column: 23, scope: !5)
!20 = !DILocation(line: 39, column: 22, scope: !5)
!21 = !DILocation(line: 41, column: 37, scope: !5)
!22 = !DILocation(line: 32, column: 36, scope: !5)
!23 = !DILocation(line: 33, column: 27, scope: !5)
!24 = !DILocation(line: 34, column: 25, scope: !5)
!25 = !DILocation(line: 36, column: 34, scope: !5)
!26 = !DILocation(line: 36, column: 52, scope: !5)
!27 = !DILocation(line: 42, column: 23, scope: !5)
!28 = !DILocation(line: 45, column: 40, scope: !5)
!29 = !DILocation(line: 243, column: 36, scope: !30, inlinedAt: !32)
!30 = distinct !DILexicalBlockFile(scope: !5, file: !31, discriminator: 0)
!31 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
!32 = !DILocation(line: 46, column: 27, scope: !30)
!33 = !DILocation(line: 233, column: 15, scope: !34, inlinedAt: !35)
!34 = distinct !DILexicalBlockFile(scope: !30, file: !31, discriminator: 0)
!35 = !DILocation(line: 243, column: 36, scope: !34, inlinedAt: !36)
!36 = !DILocation(line: 46, column: 27, scope: !34)
!37 = !DILocation(line: 63, column: 24, scope: !5)
!38 = !DILocation(line: 51, column: 36, scope: !5)
!39 = !DILocation(line: 52, column: 27, scope: !5)
!40 = !DILocation(line: 53, column: 25, scope: !5)
!41 = !DILocation(line: 55, column: 41, scope: !5)
!42 = !DILocation(line: 55, column: 35, scope: !5)
!43 = !DILocation(line: 55, column: 53, scope: !5)
!44 = !DILocation(line: 55, column: 105, scope: !5)
!45 = !DILocation(line: 56, column: 35, scope: !5)
!46 = !DILocation(line: 56, column: 53, scope: !5)
!47 = !DILocation(line: 57, column: 35, scope: !5)
!48 = !DILocation(line: 57, column: 53, scope: !5)
!49 = !DILocation(line: 57, column: 105, scope: !5)
!50 = !DILocation(line: 65, column: 23, scope: !5)
!51 = !DILocation(line: 66, column: 24, scope: !5)
!52 = !DILocation(line: 67, column: 24, scope: !5)
!53 = !DILocation(line: 69, column: 24, scope: !5)
!54 = !DILocation(line: 70, column: 29, scope: !5)
!55 = !DILocation(line: 70, column: 54, scope: !5)
!56 = !DILocation(line: 51, column: 4, scope: !5)