|
|
|
source_filename = "LLVMDialectModule" |
|
|
|
@assertFunc_0 = internal constant [25 x i8] c"_call_with_frames_removed" |
|
@assertFile_0 = internal constant [38 x i8] c"<frozen importlib._bootstrap_external>" |
|
@assertMessage_0 = internal constant [38 x i8] c"index out of bounds: 0 <= tmp7 < 50257" |
|
@global_smem = external local_unnamed_addr addrspace(3) global [0 x i8] |
|
|
|
declare void @__assertfail(ptr, ptr, i32, ptr, i64) local_unnamed_addr |
|
|
|
define void @triton__0d1d2de(ptr addrspace(1) %0, ptr addrspace(1) %1, i64 %2) local_unnamed_addr !dbg !7 { |
|
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10 |
|
%5 = and i32 %4, 127, !dbg !10 |
|
%6 = shl nuw nsw i32 %5, 1, !dbg !10 |
|
%7 = or i32 %6, 1, !dbg !10 |
|
%8 = or i32 %6, 256, !dbg !10 |
|
%9 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #2, !dbg !11 |
|
%10 = sext i32 %9 to i64, !dbg !12 |
|
%11 = shl nsw i64 %10, 9, !dbg !13 |
|
%12 = zext nneg i32 %6 to i64 |
|
%13 = zext nneg i32 %8 to i64 |
|
%14 = or i64 %11, %12, !dbg !14 |
|
%15 = or i64 %11, %13, !dbg !14 |
|
%16 = getelementptr i64, ptr addrspace(1) %0, i64 %14, !dbg !15 |
|
%17 = getelementptr i64, ptr addrspace(1) %0, i64 %15, !dbg !15 |
|
%18 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.v2.b64 { $0, $1 }, [ $2 + 0 ];", "=l,=l,l,b"(ptr addrspace(1) %16, i1 true) #2, !dbg !16 |
|
%19 = extractvalue { i64, i64 } %18, 0, !dbg !16 |
|
%20 = extractvalue { i64, i64 } %18, 1, !dbg !16 |
|
%21 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.v2.b64 { $0, $1 }, [ $2 + 0 ];", "=l,=l,l,b"(ptr addrspace(1) %17, i1 true) #2, !dbg !16 |
|
%22 = extractvalue { i64, i64 } %21, 0, !dbg !16 |
|
%23 = extractvalue { i64, i64 } %21, 1, !dbg !16 |
|
%24 = insertelement <4 x i64> poison, i64 %23, i64 0, !dbg !17 |
|
%25 = insertelement <4 x i64> %24, i64 %22, i64 1, !dbg !17 |
|
%26 = insertelement <4 x i64> %25, i64 %20, i64 2, !dbg !17 |
|
%27 = insertelement <4 x i64> %26, i64 %19, i64 3, !dbg !17 |
|
%28 = icmp eq <4 x i64> %27, <i64 -1, i64 -1, i64 -1, i64 -1>, !dbg !17 |
|
%29 = select <4 x i1> %28, <4 x i64> zeroinitializer, <4 x i64> %27, !dbg !18 |
|
%30 = add <4 x i64> %29, <i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !19 |
|
%31 = icmp slt <4 x i64> %29, zeroinitializer, !dbg !20 |
|
%32 = select <4 x i1> %31, <4 x i64> %30, <4 x i64> %29, !dbg !21 |
|
%33 = icmp ult <4 x i64> %32, <i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !22 |
|
%34 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %12, !dbg !22 |
|
%35 = extractelement <4 x i1> %33, i64 3, !dbg !22 |
|
%36 = zext i1 %35 to i8, !dbg !22 |
|
%37 = insertelement <1 x i8> undef, i8 %36, i64 0, !dbg !22 |
|
store <1 x i8> %37, ptr addrspace(3) %34, align 1, !dbg !22 |
|
%38 = zext nneg i32 %7 to i64, !dbg !22 |
|
%39 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %38, !dbg !22 |
|
%40 = extractelement <4 x i1> %33, i64 2, !dbg !22 |
|
%41 = zext i1 %40 to i8, !dbg !22 |
|
%42 = insertelement <1 x i8> undef, i8 %41, i64 0, !dbg !22 |
|
store <1 x i8> %42, ptr addrspace(3) %39, align 1, !dbg !22 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !22 |
|
%43 = zext nneg i32 %5 to i64, !dbg !22 |
|
%44 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %43, !dbg !22 |
|
%45 = load i8, ptr addrspace(3) %44, align 1, !dbg !22 |
|
%46 = or i32 %5, 128, !dbg !22 |
|
%47 = zext nneg i32 %46 to i64, !dbg !22 |
|
%48 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %47, !dbg !22 |
|
%49 = load i8, ptr addrspace(3) %48, align 1, !dbg !22 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !22 |
|
%50 = extractelement <4 x i1> %33, i64 1, !dbg !22 |
|
%51 = zext i1 %50 to i8, !dbg !22 |
|
%52 = insertelement <1 x i8> undef, i8 %51, i64 0, !dbg !22 |
|
store <1 x i8> %52, ptr addrspace(3) %34, align 1, !dbg !22 |
|
%53 = extractelement <4 x i1> %33, i64 0, !dbg !22 |
|
%54 = zext i1 %53 to i8, !dbg !22 |
|
%55 = insertelement <1 x i8> undef, i8 %54, i64 0, !dbg !22 |
|
store <1 x i8> %55, ptr addrspace(3) %39, align 1, !dbg !22 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !22 |
|
%56 = load i8, ptr addrspace(3) %44, align 1, !dbg !22 |
|
%57 = load i8, ptr addrspace(3) %48, align 1, !dbg !22 |
|
%58 = insertelement <4 x i8> poison, i8 %49, i64 0, !dbg !22 |
|
%59 = insertelement <4 x i8> %58, i8 %45, i64 1, !dbg !22 |
|
%60 = insertelement <4 x i8> %59, i8 %56, i64 2, !dbg !22 |
|
%61 = insertelement <4 x i8> %60, i8 %57, i64 3, !dbg !22 |
|
%62 = icmp eq <4 x i8> %61, zeroinitializer, !dbg !22 |
|
%63 = bitcast <4 x i1> %62 to i4, !dbg !23 |
|
%.not = icmp eq i4 %63, 0, !dbg !23 |
|
br i1 %.not, label %65, label %64, !dbg !23 |
|
|
|
64: |
|
tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 883, ptr nonnull @assertFunc_0, i64 1), !dbg !23 |
|
br label %65, !dbg !23 |
|
|
|
65: |
|
%66 = or i32 %6, 257, !dbg !10 |
|
%67 = zext nneg i32 %66 to i64 |
|
%68 = or i64 %11, %67, !dbg !14 |
|
%69 = or i64 %11, %38, !dbg !14 |
|
%70 = mul nsw i64 %14, 50257, !dbg !24 |
|
%71 = mul nsw i64 %69, 50257, !dbg !24 |
|
%72 = mul nsw i64 %15, 50257, !dbg !24 |
|
%73 = mul nsw i64 %68, 50257, !dbg !24 |
|
%74 = extractelement <4 x i64> %32, i64 3, !dbg !25 |
|
%75 = getelementptr float, ptr addrspace(1) %1, i64 %74, !dbg !25 |
|
%76 = getelementptr float, ptr addrspace(1) %75, i64 %70, !dbg !25 |
|
%77 = extractelement <4 x i64> %32, i64 2, !dbg !25 |
|
%78 = getelementptr float, ptr addrspace(1) %1, i64 %77, !dbg !25 |
|
%79 = getelementptr float, ptr addrspace(1) %78, i64 %71, !dbg !25 |
|
%80 = extractelement <4 x i64> %32, i64 1, !dbg !25 |
|
%81 = getelementptr float, ptr addrspace(1) %1, i64 %80, !dbg !25 |
|
%82 = getelementptr float, ptr addrspace(1) %81, i64 %72, !dbg !25 |
|
%83 = extractelement <4 x i64> %32, i64 0, !dbg !25 |
|
%84 = getelementptr float, ptr addrspace(1) %1, i64 %83, !dbg !25 |
|
%85 = getelementptr float, ptr addrspace(1) %84, i64 %73, !dbg !25 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !26 |
|
%86 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %12, !dbg !26 |
|
%87 = ptrtoint ptr addrspace(1) %76 to i64, !dbg !26 |
|
%88 = insertelement <1 x i64> undef, i64 %87, i64 0, !dbg !26 |
|
store <1 x i64> %88, ptr addrspace(3) %86, align 8, !dbg !26 |
|
%89 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %38, !dbg !26 |
|
%90 = ptrtoint ptr addrspace(1) %79 to i64, !dbg !26 |
|
%91 = insertelement <1 x i64> undef, i64 %90, i64 0, !dbg !26 |
|
store <1 x i64> %91, ptr addrspace(3) %89, align 8, !dbg !26 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !26 |
|
%92 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %43, !dbg !26 |
|
%93 = load i64, ptr addrspace(3) %92, align 8, !dbg !26 |
|
%94 = inttoptr i64 %93 to ptr addrspace(1), !dbg !26 |
|
%95 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %47, !dbg !26 |
|
%96 = load i64, ptr addrspace(3) %95, align 8, !dbg !26 |
|
%97 = inttoptr i64 %96 to ptr addrspace(1), !dbg !26 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !26 |
|
%98 = ptrtoint ptr addrspace(1) %82 to i64, !dbg !26 |
|
%99 = insertelement <1 x i64> undef, i64 %98, i64 0, !dbg !26 |
|
store <1 x i64> %99, ptr addrspace(3) %86, align 8, !dbg !26 |
|
%100 = ptrtoint ptr addrspace(1) %85 to i64, !dbg !26 |
|
%101 = insertelement <1 x i64> undef, i64 %100, i64 0, !dbg !26 |
|
store <1 x i64> %101, ptr addrspace(3) %89, align 8, !dbg !26 |
|
tail call void @llvm.nvvm.barrier0(), !dbg !26 |
|
%102 = load i64, ptr addrspace(3) %92, align 8, !dbg !26 |
|
%103 = inttoptr i64 %102 to ptr addrspace(1), !dbg !26 |
|
%104 = load i64, ptr addrspace(3) %95, align 8, !dbg !26 |
|
%105 = inttoptr i64 %104 to ptr addrspace(1), !dbg !26 |
|
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 -1082130432, ptr addrspace(1) %94, i1 true) #2, !dbg !26 |
|
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 -1082130432, ptr addrspace(1) %97, i1 true) #2, !dbg !26 |
|
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 -1082130432, ptr addrspace(1) %103, i1 true) #2, !dbg !26 |
|
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 -1082130432, ptr addrspace(1) %105, i1 true) #2, !dbg !26 |
|
ret void, !dbg !27 |
|
} |
|
|
|
|
|
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0 |
|
|
|
|
|
declare void @llvm.nvvm.barrier0() #1 |
|
|
|
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
|
attributes #1 = { convergent nocallback nounwind } |
|
attributes #2 = { nounwind } |
|
|
|
!llvm.module.flags = !{!0, !1} |
|
!llvm.dbg.cu = !{!2} |
|
!nvvm.annotations = !{!4, !5, !5, !4} |
|
!llvm.ident = !{!6} |
|
|
|
!0 = !{i32 2, !"Debug Info Version", i32 3} |
|
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1} |
|
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) |
|
!3 = !DIFile(filename: "chlrkgpvvbdizdz7sllquet2j7zhtes6meh6kenrqxov26mswvw7.py", directory: "/tmp/torchinductor_root/hl") |
|
!4 = !{ptr @triton__0d1d2de, !"kernel", i32 1} |
|
!5 = !{ptr @triton__0d1d2de, !"maxntidx", i32 128} |
|
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} |
|
!7 = distinct !DISubprogram(name: "triton__0d1d2de", linkageName: "triton__0d1d2de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) |
|
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9) |
|
!9 = !{} |
|
!10 = !DILocation(line: 21, column: 36, scope: !7) |
|
!11 = !DILocation(line: 20, column: 28, scope: !7) |
|
!12 = !DILocation(line: 20, column: 34, scope: !7) |
|
!13 = !DILocation(line: 20, column: 46, scope: !7) |
|
!14 = !DILocation(line: 21, column: 23, scope: !7) |
|
!15 = !DILocation(line: 24, column: 30, scope: !7) |
|
!16 = !DILocation(line: 24, column: 35, scope: !7) |
|
!17 = !DILocation(line: 26, column: 19, scope: !7) |
|
!18 = !DILocation(line: 28, column: 32, scope: !7) |
|
!19 = !DILocation(line: 29, column: 18, scope: !7) |
|
!20 = !DILocation(line: 30, column: 18, scope: !7) |
|
!21 = !DILocation(line: 31, column: 32, scope: !7) |
|
!22 = !DILocation(line: 32, column: 36, scope: !7) |
|
!23 = !DILocation(line: 32, column: 51, scope: !7) |
|
!24 = !DILocation(line: 34, column: 39, scope: !7) |
|
!25 = !DILocation(line: 34, column: 25, scope: !7) |
|
!26 = !DILocation(line: 34, column: 51, scope: !7) |
|
!27 = !DILocation(line: 34, column: 4, scope: !7) |
|
|