|
// |
|
// Generated by LLVM NVPTX Back-End |
|
// |
|
|
|
.version 8.2 |
|
.target sm_89 |
|
.address_size 64 |
|
|
|
// .globl triton__0d1d2d3d4d5d6e7de |
|
.extern .func __assertfail |
|
( |
|
.param .b64 __assertfail_param_0, |
|
.param .b64 __assertfail_param_1, |
|
.param .b32 __assertfail_param_2, |
|
.param .b64 __assertfail_param_3, |
|
.param .b64 __assertfail_param_4 |
|
) |
|
; |
|
.global .align 1 .b8 assertFunc_0[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100}; |
|
.global .align 1 .b8 assertFile_0[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62}; |
|
.global .align 1 .b8 assertMessage_0[39] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 49, 49, 32, 60, 32, 53, 48, 50, 53, 55}; |
|
.extern .shared .align 1 .b8 global_smem[]; |
|
.global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0}; |
|
|
|
.visible .entry triton__0d1d2d3d4d5d6e7de( |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_0, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_1, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_2, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_3, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_4, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_5, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_6, |
|
.param .u64 triton__0d1d2d3d4d5d6e7de_param_7 |
|
) |
|
.maxntid 256, 1, 1 |
|
{ |
|
.reg .pred %p<145>; |
|
.reg .b16 %rs<83>; |
|
.reg .b32 %r<247>; |
|
.reg .f32 %f<401>; |
|
.reg .b64 %rd<209>; |
|
.loc 1 18 0 |
|
$L__func_begin0: |
|
.loc 1 18 0 |
|
|
|
ld.param.u64 %rd40, [triton__0d1d2d3d4d5d6e7de_param_5]; |
|
ld.param.u64 %rd39, [triton__0d1d2d3d4d5d6e7de_param_4]; |
|
ld.param.u64 %rd49, [triton__0d1d2d3d4d5d6e7de_param_0]; |
|
ld.param.u64 %rd50, [triton__0d1d2d3d4d5d6e7de_param_1]; |
|
$L__tmp0: |
|
.loc 1 24 33 |
|
mov.u32 %r1, %tid.x; |
|
ld.param.u64 %rd51, [triton__0d1d2d3d4d5d6e7de_param_2]; |
|
and.b32 %r2, %r1, 255; |
|
ld.param.u64 %rd52, [triton__0d1d2d3d4d5d6e7de_param_3]; |
|
or.b32 %r36, %r2, 256; |
|
or.b32 %r37, %r2, 512; |
|
or.b32 %r38, %r2, 768; |
|
shl.b32 %r3, %r2, 2; |
|
or.b32 %r39, %r3, 1; |
|
or.b32 %r40, %r3, 2; |
|
or.b32 %r41, %r3, 3; |
|
.loc 1 21 28 |
|
mov.u32 %r34, %ctaid.x; |
|
.loc 1 21 34 |
|
cvt.s64.s32 %rd1, %r34; |
|
.loc 1 23 21 |
|
setp.lt.s32 %p1, %r34, 8; |
|
shl.b32 %r42, %r2, 3; |
|
shl.b32 %r43, %r2, 5; |
|
mov.u32 %r44, global_smem; |
|
add.s32 %r4, %r44, %r43; |
|
shl.b32 %r45, %r39, 1; |
|
shl.b32 %r46, %r39, 3; |
|
add.s32 %r5, %r44, %r46; |
|
shl.b32 %r47, %r40, 1; |
|
shl.b32 %r48, %r40, 3; |
|
add.s32 %r6, %r44, %r48; |
|
shl.b32 %r49, %r41, 1; |
|
shl.b32 %r50, %r41, 3; |
|
add.s32 %r7, %r44, %r50; |
|
shl.b32 %r51, %r2, 1; |
|
add.s32 %r12, %r44, %r42; |
|
shl.b32 %r52, %r36, 1; |
|
shl.b32 %r53, %r36, 3; |
|
add.s32 %r9, %r44, %r53; |
|
shl.b32 %r54, %r37, 1; |
|
shl.b32 %r55, %r37, 3; |
|
add.s32 %r10, %r44, %r55; |
|
shl.b32 %r56, %r38, 1; |
|
shl.b32 %r57, %r38, 3; |
|
add.s32 %r11, %r44, %r57; |
|
add.s32 %r13, %r44, %r45; |
|
add.s32 %r14, %r44, %r47; |
|
add.s32 %r15, %r44, %r49; |
|
add.s32 %r16, %r44, %r51; |
|
add.s32 %r17, %r44, %r52; |
|
add.s32 %r18, %r44, %r54; |
|
add.s32 %r19, %r44, %r56; |
|
add.s32 %r20, %r44, %r3; |
|
add.s32 %r21, %r44, %r2; |
|
shl.b32 %r58, %r2, 6; |
|
add.s32 %r22, %r44, %r58; |
|
shl.b32 %r59, %r39, 4; |
|
add.s32 %r23, %r44, %r59; |
|
shl.b32 %r60, %r40, 4; |
|
add.s32 %r24, %r44, %r60; |
|
shl.b32 %r61, %r41, 4; |
|
add.s32 %r25, %r44, %r61; |
|
shl.b32 %r62, %r2, 4; |
|
add.s32 %r26, %r44, %r62; |
|
shl.b32 %r63, %r36, 4; |
|
add.s32 %r27, %r44, %r63; |
|
shl.b32 %r64, %r37, 4; |
|
add.s32 %r28, %r44, %r64; |
|
shl.b32 %r65, %r38, 4; |
|
add.s32 %r29, %r44, %r65; |
|
.loc 1 28 36 |
|
mul.wide.s32 %rd53, %r34, 61440; |
|
mul.wide.u32 %rd54, %r2, 32; |
|
add.s64 %rd55, %rd53, %rd54; |
|
add.s64 %rd56, %rd55, %rd49; |
|
add.s64 %rd200, %rd56, 8208; |
|
mul.wide.s32 %rd57, %r34, 771947520; |
|
add.s64 %rd58, %rd50, %rd57; |
|
mul.wide.u32 %rd59, %r2, 402056; |
|
add.s64 %rd60, %rd58, %rd59; |
|
add.s64 %rd199, %rd60, 103227878; |
|
mul.wide.u32 %rd4, %r2, 16; |
|
mul.wide.s32 %rd61, %r34, 30720; |
|
add.s64 %rd198, %rd52, %rd61; |
|
add.s64 %rd197, %rd51, %rd61; |
|
mov.u64 %rd201, 0; |
|
mov.f32 %f385, 0f00000000; |
|
mov.b32 %r246, -2048; |
|
mov.u16 %rs44, 0; |
|
mov.f32 %f386, %f385; |
|
mov.f32 %f387, %f385; |
|
mov.f32 %f388, %f385; |
|
mov.f32 %f389, %f385; |
|
mov.f32 %f390, %f385; |
|
mov.f32 %f391, %f385; |
|
mov.f32 %f392, %f385; |
|
mov.u64 %rd202, %rd201; |
|
mov.u64 %rd203, %rd201; |
|
mov.u64 %rd204, %rd201; |
|
mov.u64 %rd205, %rd201; |
|
mov.u64 %rd206, %rd201; |
|
mov.u64 %rd207, %rd201; |
|
mov.u64 %rd208, %rd201; |
|
bra.uni $L__BB0_1; |
|
$L__BB0_19: |
|
.loc 1 0 0 |
|
add.s64 %rd206, %rd206, %rd81; |
|
add.s64 %rd205, %rd205, %rd80; |
|
add.s64 %rd208, %rd208, %rd79; |
|
add.s64 %rd207, %rd207, %rd78; |
|
add.s64 %rd202, %rd202, %rd85; |
|
add.s64 %rd201, %rd201, %rd84; |
|
add.s64 %rd204, %rd204, %rd83; |
|
add.s64 %rd203, %rd203, %rd82; |
|
.loc 1 36 23 |
|
bfe.s32 %r172, %r115, 0, 8; |
|
cvt.u16.u32 %rs67, %r172; |
|
and.b16 %rs68, %rs67, 255; |
|
setp.eq.s16 %p124, %rs68, 0; |
|
bfe.s32 %r173, %r115, 8, 8; |
|
cvt.u16.u32 %rs69, %r173; |
|
and.b16 %rs70, %rs69, 255; |
|
setp.eq.s16 %p125, %rs70, 0; |
|
bfe.s32 %r174, %r115, 16, 8; |
|
cvt.u16.u32 %rs71, %r174; |
|
and.b16 %rs72, %rs71, 255; |
|
setp.eq.s16 %p126, %rs72, 0; |
|
bfe.s32 %r175, %r115, 24, 8; |
|
cvt.u16.u32 %rs73, %r175; |
|
and.b16 %rs74, %rs73, 255; |
|
setp.eq.s16 %p127, %rs74, 0; |
|
bfe.s32 %r176, %r108, 0, 8; |
|
cvt.u16.u32 %rs75, %r176; |
|
and.b16 %rs76, %rs75, 255; |
|
setp.eq.s16 %p128, %rs76, 0; |
|
bfe.s32 %r177, %r108, 8, 8; |
|
cvt.u16.u32 %rs77, %r177; |
|
and.b16 %rs78, %rs77, 255; |
|
setp.eq.s16 %p129, %rs78, 0; |
|
bfe.s32 %r178, %r108, 16, 8; |
|
cvt.u16.u32 %rs79, %r178; |
|
and.b16 %rs80, %rs79, 255; |
|
setp.eq.s16 %p130, %rs80, 0; |
|
bfe.s32 %r179, %r108, 24, 8; |
|
cvt.u16.u32 %rs81, %r179; |
|
and.b16 %rs82, %rs81, 255; |
|
setp.eq.s16 %p131, %rs82, 0; |
|
.loc 1 50 23 |
|
setp.eq.f32 %p132, %f68, 0f00000000; |
|
selp.f32 %f320, 0fFF800000, %f400, %p132; |
|
bar.sync 0; |
|
st.shared.f32 [%r4], %f37; |
|
st.shared.f32 [%r5], %f42; |
|
st.shared.f32 [%r6], %f47; |
|
st.shared.f32 [%r7], %f52; |
|
bar.sync 0; |
|
ld.shared.f32 %f321, [%r12]; |
|
ld.shared.f32 %f322, [%r9]; |
|
ld.shared.f32 %f323, [%r10]; |
|
ld.shared.f32 %f324, [%r11]; |
|
bar.sync 0; |
|
st.shared.f32 [%r4], %f57; |
|
st.shared.f32 [%r5], %f62; |
|
st.shared.f32 [%r6], %f67; |
|
st.shared.f32 [%r7], %f320; |
|
bar.sync 0; |
|
ld.shared.f32 %f325, [%r12]; |
|
ld.shared.f32 %f326, [%r9]; |
|
ld.shared.f32 %f327, [%r10]; |
|
ld.shared.f32 %f328, [%r11]; |
|
.loc 1 54 17 |
|
sub.f32 %f329, %f324, %f28; |
|
sub.f32 %f330, %f323, %f27; |
|
sub.f32 %f331, %f322, %f26; |
|
sub.f32 %f332, %f321, %f25; |
|
sub.f32 %f333, %f328, %f32; |
|
sub.f32 %f334, %f327, %f31; |
|
sub.f32 %f335, %f326, %f30; |
|
sub.f32 %f336, %f325, %f29; |
|
add.f32 %f337, %f336, 0f00000000; |
|
add.f32 %f338, %f335, 0f00000000; |
|
add.f32 %f339, %f334, 0f00000000; |
|
add.f32 %f340, %f333, 0f00000000; |
|
add.f32 %f341, %f332, 0f00000000; |
|
add.f32 %f342, %f331, 0f00000000; |
|
add.f32 %f343, %f330, 0f00000000; |
|
add.f32 %f344, %f329, 0f00000000; |
|
.loc 1 56 38 |
|
selp.f32 %f345, 0f00000000, %f344, %p131; |
|
selp.f32 %f346, 0f00000000, %f343, %p130; |
|
selp.f32 %f347, 0f00000000, %f342, %p129; |
|
selp.f32 %f348, 0f00000000, %f341, %p128; |
|
selp.f32 %f349, 0f00000000, %f340, %p127; |
|
selp.f32 %f350, 0f00000000, %f339, %p126; |
|
selp.f32 %f351, 0f00000000, %f338, %p125; |
|
selp.f32 %f352, 0f00000000, %f337, %p124; |
|
.loc 1 59 48 |
|
selp.f32 %f353, %f352, 0f80000000, %p1; |
|
selp.f32 %f354, %f351, 0f80000000, %p1; |
|
selp.f32 %f355, %f350, 0f80000000, %p97; |
|
selp.f32 %f356, %f349, 0f80000000, %p97; |
|
selp.f32 %f357, %f348, 0f80000000, %p1; |
|
selp.f32 %f358, %f347, 0f80000000, %p1; |
|
selp.f32 %f359, %f346, 0f80000000, %p1; |
|
selp.f32 %f360, %f345, 0f80000000, %p1; |
|
add.f32 %f388, %f388, %f360; |
|
add.f32 %f387, %f387, %f359; |
|
add.f32 %f386, %f386, %f358; |
|
add.f32 %f385, %f385, %f357; |
|
add.f32 %f392, %f392, %f356; |
|
add.f32 %f391, %f391, %f355; |
|
add.f32 %f390, %f390, %f354; |
|
add.f32 %f389, %f389, %f353; |
|
.loc 1 28 36 |
|
add.s64 %rd200, %rd200, 16384; |
|
add.s32 %r246, %r246, 2048; |
|
add.s64 %rd199, %rd199, 205852672; |
|
add.s64 %rd198, %rd198, 8192; |
|
add.s64 %rd197, %rd197, 8192; |
|
setp.lt.u32 %p133, %r246, 5632; |
|
@%p133 bra $L__BB0_1; |
|
bra.uni $L__BB0_20; |
|
$L__BB0_1: |
|
.loc 1 0 36 |
|
cvt.u32.u64 %r98, %rd1; |
|
.loc 1 23 21 |
|
setp.lt.s32 %p85, %r98, 8; |
|
.loc 1 29 27 |
|
add.s32 %r99, %r246, 3584; |
|
add.s32 %r100, %r3, %r246; |
|
add.s32 %r101, %r100, 3072; |
|
.loc 1 30 25 |
|
setp.lt.u32 %p42, %r99, 7680; |
|
setp.lt.u32 %p43, %r101, 7680; |
|
.loc 1 29 27 |
|
add.s64 %rd64, %rd200, -8208; |
|
.loc 1 32 34 |
|
add.s64 %rd67, %rd200, -8192; |
|
add.s64 %rd70, %rd200, -16; |
|
.loc 1 32 59 |
|
and.pred %p97, %p85, %p42; |
|
and.pred %p16, %p85, %p43; |
|
.loc 1 32 51 |
|
mov.u64 %rd62, 0x0; |
|
mov.u64 %rd63, 0x0; |
|
@%p85 ld.global.L1::evict_first.v2.b64 { %rd62, %rd63 }, [ %rd64 + 0 ]; |
|
@!%p85 mov.u64 %rd62, 0x0; |
|
@!%p85 mov.u64 %rd63, 0x0; |
|
mov.u64 %rd65, 0x0; |
|
mov.u64 %rd66, 0x0; |
|
@%p85 ld.global.L1::evict_first.v2.b64 { %rd65, %rd66 }, [ %rd67 + 0 ]; |
|
@!%p85 mov.u64 %rd65, 0x0; |
|
@!%p85 mov.u64 %rd66, 0x0; |
|
mov.u64 %rd68, 0x0; |
|
mov.u64 %rd69, 0x0; |
|
@%p16 ld.global.L1::evict_first.v2.b64 { %rd68, %rd69 }, [ %rd70 + 0 ]; |
|
@!%p16 mov.u64 %rd68, 0x0; |
|
@!%p16 mov.u64 %rd69, 0x0; |
|
mov.u64 %rd71, 0x0; |
|
mov.u64 %rd72, 0x0; |
|
@%p16 ld.global.L1::evict_first.v2.b64 { %rd71, %rd72 }, [ %rd200 + 0 ]; |
|
@!%p16 mov.u64 %rd71, 0x0; |
|
@!%p16 mov.u64 %rd72, 0x0; |
|
.loc 1 33 35 |
|
add.s64 %rd74, %rd197, %rd4; |
|
.loc 1 33 52 |
|
add.s64 %rd75, %rd74, 4096; |
|
mov.b32 %r70, 0; |
|
mov.u32 %r66, 0x0; |
|
mov.u32 %r67, 0x0; |
|
mov.u32 %r68, 0x0; |
|
mov.u32 %r69, 0x0; |
|
@%p85 ld.global.L1::evict_first.v4.b32 { %r66, %r67, %r68, %r69 }, [ %rd74 + 0 ]; |
|
@!%p85 mov.u32 %r66, %r70; |
|
@!%p85 mov.u32 %r67, %r70; |
|
@!%p85 mov.u32 %r68, %r70; |
|
@!%p85 mov.u32 %r69, %r70; |
|
mov.u32 %r74, 0x0; |
|
mov.u32 %r75, 0x0; |
|
mov.u32 %r76, 0x0; |
|
mov.u32 %r77, 0x0; |
|
@%p16 ld.global.L1::evict_first.v4.b32 { %r74, %r75, %r76, %r77 }, [ %rd75 + 0 ]; |
|
@!%p16 mov.u32 %r74, %r70; |
|
@!%p16 mov.u32 %r75, %r70; |
|
@!%p16 mov.u32 %r76, %r70; |
|
@!%p16 mov.u32 %r77, %r70; |
|
bar.sync 0; |
|
st.shared.u32 [%r4], %r66; |
|
st.shared.u32 [%r5], %r67; |
|
st.shared.u32 [%r6], %r68; |
|
st.shared.u32 [%r7], %r69; |
|
bar.sync 0; |
|
ld.shared.f32 %f9, [%r12]; |
|
ld.shared.f32 %f10, [%r9]; |
|
ld.shared.f32 %f11, [%r10]; |
|
ld.shared.f32 %f12, [%r11]; |
|
bar.sync 0; |
|
st.shared.u32 [%r4], %r74; |
|
st.shared.u32 [%r5], %r75; |
|
st.shared.u32 [%r6], %r76; |
|
st.shared.u32 [%r7], %r77; |
|
bar.sync 0; |
|
ld.shared.f32 %f13, [%r12]; |
|
ld.shared.f32 %f14, [%r9]; |
|
ld.shared.f32 %f15, [%r10]; |
|
ld.shared.f32 %f16, [%r11]; |
|
.loc 1 34 35 |
|
add.s64 %rd76, %rd198, %rd4; |
|
.loc 1 34 52 |
|
add.s64 %rd77, %rd76, 4096; |
|
mov.u32 %r82, 0x0; |
|
mov.u32 %r83, 0x0; |
|
mov.u32 %r84, 0x0; |
|
mov.u32 %r85, 0x0; |
|
@%p85 ld.global.L1::evict_first.v4.b32 { %r82, %r83, %r84, %r85 }, [ %rd76 + 0 ]; |
|
@!%p85 mov.u32 %r82, %r70; |
|
@!%p85 mov.u32 %r83, %r70; |
|
@!%p85 mov.u32 %r84, %r70; |
|
@!%p85 mov.u32 %r85, %r70; |
|
mov.b32 %f17, %r82; |
|
mov.u32 %r90, 0x0; |
|
mov.u32 %r91, 0x0; |
|
mov.u32 %r92, 0x0; |
|
mov.u32 %r93, 0x0; |
|
@%p16 ld.global.L1::evict_first.v4.b32 { %r90, %r91, %r92, %r93 }, [ %rd77 + 0 ]; |
|
@!%p16 mov.u32 %r90, %r70; |
|
@!%p16 mov.u32 %r91, %r70; |
|
@!%p16 mov.u32 %r92, %r70; |
|
@!%p16 mov.u32 %r93, %r70; |
|
.loc 1 36 23 |
|
bar.sync 0; |
|
setp.ne.s64 %p44, %rd72, -1; |
|
setp.ne.s64 %p45, %rd71, -1; |
|
setp.ne.s64 %p46, %rd69, -1; |
|
setp.ne.s64 %p47, %rd68, -1; |
|
setp.ne.s64 %p48, %rd66, -1; |
|
setp.ne.s64 %p49, %rd65, -1; |
|
setp.ne.s64 %p50, %rd63, -1; |
|
setp.ne.s64 %p51, %rd62, -1; |
|
selp.u16 %rs1, 1, 0, %p51; |
|
st.shared.u8 [%r12], %rs1; |
|
selp.u16 %rs2, 1, 0, %p50; |
|
st.shared.u8 [%r13], %rs2; |
|
selp.u16 %rs3, 1, 0, %p49; |
|
st.shared.u8 [%r14], %rs3; |
|
selp.u16 %rs4, 1, 0, %p48; |
|
st.shared.u8 [%r15], %rs4; |
|
bar.sync 0; |
|
ld.shared.u8 %r102, [%r19]; |
|
ld.shared.u8 %r103, [%r18]; |
|
ld.shared.u8 %r104, [%r17]; |
|
ld.shared.u8 %r105, [%r16]; |
|
bar.sync 0; |
|
selp.u16 %rs5, 1, 0, %p47; |
|
st.shared.u8 [%r12], %rs5; |
|
selp.u16 %rs6, 1, 0, %p46; |
|
st.shared.u8 [%r13], %rs6; |
|
selp.u16 %rs7, 1, 0, %p45; |
|
st.shared.u8 [%r14], %rs7; |
|
selp.u16 %rs8, 1, 0, %p44; |
|
st.shared.u8 [%r15], %rs8; |
|
bar.sync 0; |
|
bfi.b32 %r106, %r104, %r105, 8, 8; |
|
bfi.b32 %r107, %r103, %r106, 16, 8; |
|
bfi.b32 %r108, %r102, %r107, 24, 8; |
|
ld.shared.u8 %r109, [%r16]; |
|
ld.shared.u8 %r110, [%r17]; |
|
bfi.b32 %r111, %r110, %r109, 8, 8; |
|
ld.shared.u8 %r112, [%r18]; |
|
bfi.b32 %r113, %r112, %r111, 16, 8; |
|
ld.shared.u8 %r114, [%r19]; |
|
bfi.b32 %r115, %r114, %r113, 24, 8; |
|
.loc 1 46 41 |
|
bar.sync 0; |
|
.loc 1 42 36 |
|
selp.b64 %rd86, %rd62, 0, %p51; |
|
selp.b64 %rd87, %rd63, 0, %p50; |
|
selp.b64 %rd88, %rd65, 0, %p49; |
|
selp.b64 %rd89, %rd66, 0, %p48; |
|
.loc 1 43 22 |
|
add.s64 %rd90, %rd89, 50257; |
|
add.s64 %rd91, %rd88, 50257; |
|
add.s64 %rd92, %rd87, 50257; |
|
add.s64 %rd93, %rd86, 50257; |
|
.loc 1 44 23 |
|
setp.lt.s64 %p60, %rd89, 0; |
|
setp.lt.s64 %p61, %rd88, 0; |
|
setp.lt.s64 %p62, %rd87, 0; |
|
setp.lt.s64 %p63, %rd86, 0; |
|
.loc 1 45 38 |
|
selp.b64 %rd27, %rd93, %rd86, %p63; |
|
selp.b64 %rd28, %rd92, %rd87, %p62; |
|
selp.b64 %rd29, %rd91, %rd88, %p61; |
|
selp.b64 %rd30, %rd90, %rd89, %p60; |
|
.loc 1 46 41 |
|
setp.lt.u64 %p64, %rd30, 50257; |
|
setp.lt.u64 %p65, %rd29, 50257; |
|
setp.lt.u64 %p66, %rd28, 50257; |
|
setp.lt.u64 %p67, %rd27, 50257; |
|
selp.u32 %r116, 1, 0, %p67; |
|
selp.u32 %r117, 1, 0, %p66; |
|
bfi.b32 %r118, %r117, %r116, 8, 8; |
|
selp.u32 %r119, 1, 0, %p65; |
|
bfi.b32 %r120, %r119, %r118, 16, 8; |
|
selp.u32 %r121, 1, 0, %p64; |
|
bfi.b32 %r122, %r121, %r120, 24, 8; |
|
st.shared.u32 [%r20], %r122; |
|
bar.sync 0; |
|
ld.shared.u8 %rs9, [%r21]; |
|
ld.shared.u8 %rs10, [%r21+256]; |
|
ld.shared.u8 %rs11, [%r21+512]; |
|
ld.shared.u8 %rs12, [%r21+768]; |
|
bar.sync 0; |
|
.loc 1 42 36 |
|
selp.b64 %rd94, %rd68, 0, %p47; |
|
selp.b64 %rd95, %rd69, 0, %p46; |
|
selp.b64 %rd96, %rd71, 0, %p45; |
|
selp.b64 %rd97, %rd72, 0, %p44; |
|
.loc 1 43 22 |
|
add.s64 %rd98, %rd97, 50257; |
|
add.s64 %rd99, %rd96, 50257; |
|
add.s64 %rd100, %rd95, 50257; |
|
add.s64 %rd101, %rd94, 50257; |
|
.loc 1 44 23 |
|
setp.lt.s64 %p68, %rd97, 0; |
|
setp.lt.s64 %p69, %rd96, 0; |
|
setp.lt.s64 %p70, %rd95, 0; |
|
setp.lt.s64 %p71, %rd94, 0; |
|
.loc 1 45 38 |
|
selp.b64 %rd31, %rd101, %rd94, %p71; |
|
selp.b64 %rd32, %rd100, %rd95, %p70; |
|
selp.b64 %rd33, %rd99, %rd96, %p69; |
|
selp.b64 %rd34, %rd98, %rd97, %p68; |
|
.loc 1 46 41 |
|
setp.lt.u64 %p72, %rd34, 50257; |
|
setp.lt.u64 %p73, %rd33, 50257; |
|
setp.lt.u64 %p74, %rd32, 50257; |
|
setp.lt.u64 %p75, %rd31, 50257; |
|
selp.u32 %r123, 1, 0, %p75; |
|
selp.u32 %r124, 1, 0, %p74; |
|
bfi.b32 %r125, %r124, %r123, 8, 8; |
|
selp.u32 %r126, 1, 0, %p73; |
|
bfi.b32 %r127, %r126, %r125, 16, 8; |
|
selp.u32 %r128, 1, 0, %p72; |
|
bfi.b32 %r129, %r128, %r127, 24, 8; |
|
st.shared.u32 [%r20], %r129; |
|
bar.sync 0; |
|
ld.shared.u8 %rs13, [%r21]; |
|
ld.shared.u8 %rs14, [%r21+256]; |
|
ld.shared.u8 %rs15, [%r21+512]; |
|
ld.shared.u8 %rs16, [%r21+768]; |
|
setp.eq.s16 %p76, %rs11, 0; |
|
selp.u16 %rs17, 1, 0, %p76; |
|
shl.b16 %rs18, %rs17, 2; |
|
setp.eq.s16 %p77, %rs12, 0; |
|
selp.u16 %rs19, -1, 0, %p77; |
|
shl.b16 %rs20, %rs19, 3; |
|
or.b16 %rs21, %rs20, %rs18; |
|
setp.eq.s16 %p78, %rs10, 0; |
|
selp.u16 %rs22, 1, 0, %p78; |
|
setp.eq.s16 %p79, %rs9, 0; |
|
selp.u16 %rs23, -1, 0, %p79; |
|
shl.b16 %rs24, %rs23, 1; |
|
or.b16 %rs25, %rs22, %rs24; |
|
and.b16 %rs26, %rs25, 3; |
|
or.b16 %rs27, %rs26, %rs21; |
|
and.b16 %rs28, %rs27, 15; |
|
setp.eq.s16 %p80, %rs15, 0; |
|
selp.u16 %rs29, 1, 0, %p80; |
|
shl.b16 %rs30, %rs29, 2; |
|
setp.eq.s16 %p81, %rs16, 0; |
|
selp.u16 %rs31, -1, 0, %p81; |
|
shl.b16 %rs32, %rs31, 3; |
|
or.b16 %rs33, %rs32, %rs30; |
|
setp.eq.s16 %p82, %rs13, 0; |
|
selp.u16 %rs34, 1, 0, %p82; |
|
setp.eq.s16 %p83, %rs14, 0; |
|
selp.u16 %rs35, -1, 0, %p83; |
|
shl.b16 %rs36, %rs35, 1; |
|
or.b16 %rs37, %rs34, %rs36; |
|
and.b16 %rs38, %rs37, 3; |
|
or.b16 %rs39, %rs38, %rs33; |
|
shl.b16 %rs40, %rs39, 4; |
|
or.b16 %rs41, %rs28, %rs40; |
|
.loc 1 46 57 |
|
and.b16 %rs42, %rs41, 255; |
|
setp.eq.s16 %p84, %rs42, 0; |
|
@%p84 bra $L__BB0_3; |
|
mov.u64 %rd102, assertMessage_0; |
|
cvta.global.u64 %rd103, %rd102; |
|
mov.u64 %rd104, assertFile_0; |
|
cvta.global.u64 %rd105, %rd104; |
|
mov.u64 %rd106, assertFunc_0; |
|
cvta.global.u64 %rd107, %rd106; |
|
mov.b32 %r130, 883; |
|
mov.u64 %rd108, 1; |
|
{ // callseq 0, 0 |
|
.reg .b32 temp_param_reg; |
|
.param .b64 param0; |
|
st.param.b64 [param0+0], %rd103; |
|
.param .b64 param1; |
|
st.param.b64 [param1+0], %rd105; |
|
.param .b32 param2; |
|
st.param.b32 [param2+0], %r130; |
|
.param .b64 param3; |
|
st.param.b64 [param3+0], %rd107; |
|
.param .b64 param4; |
|
st.param.b64 [param4+0], %rd108; |
|
call.uni |
|
__assertfail, |
|
( |
|
param0, |
|
param1, |
|
param2, |
|
param3, |
|
param4 |
|
); |
|
} // callseq 0 |
|
$L__BB0_3: |
|
.loc 1 47 73 |
|
bar.sync 0; |
|
shl.b64 %rd117, %rd27, 1; |
|
add.s64 %rd118, %rd199, %rd117; |
|
add.s64 %rd119, %rd118, -103227878; |
|
st.shared.u64 [%r22], %rd119; |
|
shl.b64 %rd120, %rd28, 1; |
|
add.s64 %rd121, %rd199, %rd120; |
|
add.s64 %rd122, %rd121, -103127364; |
|
st.shared.u64 [%r23], %rd122; |
|
shl.b64 %rd123, %rd29, 1; |
|
add.s64 %rd124, %rd199, %rd123; |
|
add.s64 %rd125, %rd124, -103026850; |
|
st.shared.u64 [%r24], %rd125; |
|
shl.b64 %rd126, %rd30, 1; |
|
add.s64 %rd127, %rd199, %rd126; |
|
add.s64 %rd128, %rd127, -102926336; |
|
st.shared.u64 [%r25], %rd128; |
|
bar.sync 0; |
|
ld.shared.u64 %rd109, [%r26]; |
|
ld.shared.u64 %rd110, [%r27]; |
|
ld.shared.u64 %rd111, [%r28]; |
|
ld.shared.u64 %rd112, [%r29]; |
|
bar.sync 0; |
|
shl.b64 %rd129, %rd31, 1; |
|
add.s64 %rd130, %rd199, %rd129; |
|
add.s64 %rd131, %rd130, -301542; |
|
st.shared.u64 [%r22], %rd131; |
|
shl.b64 %rd132, %rd32, 1; |
|
add.s64 %rd133, %rd199, %rd132; |
|
add.s64 %rd134, %rd133, -201028; |
|
st.shared.u64 [%r23], %rd134; |
|
shl.b64 %rd135, %rd33, 1; |
|
add.s64 %rd136, %rd199, %rd135; |
|
add.s64 %rd137, %rd136, -100514; |
|
st.shared.u64 [%r24], %rd137; |
|
shl.b64 %rd138, %rd34, 1; |
|
add.s64 %rd139, %rd199, %rd138; |
|
st.shared.u64 [%r25], %rd139; |
|
bar.sync 0; |
|
ld.shared.u64 %rd113, [%r26]; |
|
ld.shared.u64 %rd114, [%r27]; |
|
ld.shared.u64 %rd115, [%r28]; |
|
ld.shared.u64 %rd116, [%r29]; |
|
mov.u16 %rs43, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs43 }, [ %rd109 + 0 ]; |
|
@!%p85 mov.u16 %rs43, %rs44; |
|
mov.u16 %rs45, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs45 }, [ %rd110 + 0 ]; |
|
@!%p85 mov.u16 %rs45, %rs44; |
|
mov.u16 %rs47, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs47 }, [ %rd111 + 0 ]; |
|
@!%p85 mov.u16 %rs47, %rs44; |
|
mov.u16 %rs49, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs49 }, [ %rd112 + 0 ]; |
|
@!%p85 mov.u16 %rs49, %rs44; |
|
mov.u16 %rs51, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs51 }, [ %rd113 + 0 ]; |
|
@!%p85 mov.u16 %rs51, %rs44; |
|
mov.u16 %rs53, 0x0; |
|
@%p85 ld.global.L1::evict_last.b16 { %rs53 }, [ %rd114 + 0 ]; |
|
@!%p85 mov.u16 %rs53, %rs44; |
|
mov.u16 %rs55, 0x0; |
|
@%p97 ld.global.L1::evict_last.b16 { %rs55 }, [ %rd115 + 0 ]; |
|
@!%p97 mov.u16 %rs55, %rs44; |
|
mov.u16 %rs57, 0x0; |
|
@%p97 ld.global.L1::evict_last.b16 { %rs57 }, [ %rd116 + 0 ]; |
|
@!%p97 mov.u16 %rs57, %rs44; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p101, %f17, 0f00800000; |
|
mul.f32 %f96, %f17, 0f4B000000; |
|
selp.f32 %f33, %f96, %f17, %p101; |
|
selp.f32 %f97, 0fC1B80000, 0f00000000, %p101; |
|
mov.b32 %r140, %f33; |
|
add.s32 %r141, %r140, -1059760811; |
|
and.b32 %r142, %r141, -8388608; |
|
sub.s32 %r143, %r140, %r142; |
|
mov.b32 %f98, %r143; |
|
cvt.rn.f32.s32 %f99, %r142; |
|
mov.f32 %f100, 0f34000000; |
|
fma.rn.ftz.f32 %f101, %f99, %f100, %f97; |
|
add.f32 %f102, %f98, 0fBF800000; |
|
mov.f32 %f103, 0f3E1039F6; |
|
mov.f32 %f104, 0fBE055027; |
|
fma.rn.ftz.f32 %f105, %f104, %f102, %f103; |
|
mov.f32 %f106, 0fBDF8CDCC; |
|
fma.rn.ftz.f32 %f107, %f105, %f102, %f106; |
|
mov.f32 %f108, 0f3E0F2955; |
|
fma.rn.ftz.f32 %f109, %f107, %f102, %f108; |
|
mov.f32 %f110, 0fBE2AD8B9; |
|
fma.rn.ftz.f32 %f111, %f109, %f102, %f110; |
|
mov.f32 %f112, 0f3E4CED0B; |
|
fma.rn.ftz.f32 %f113, %f111, %f102, %f112; |
|
mov.f32 %f114, 0fBE7FFF22; |
|
fma.rn.ftz.f32 %f115, %f113, %f102, %f114; |
|
mov.f32 %f116, 0f3EAAAA78; |
|
fma.rn.ftz.f32 %f117, %f115, %f102, %f116; |
|
mov.f32 %f118, 0fBF000000; |
|
fma.rn.ftz.f32 %f119, %f117, %f102, %f118; |
|
mul.f32 %f120, %f102, %f119; |
|
fma.rn.ftz.f32 %f121, %f120, %f102, %f102; |
|
mov.f32 %f122, 0f3F317218; |
|
fma.rn.ftz.f32 %f393, %f101, %f122, %f121; |
|
setp.lt.u32 %p102, %r140, 2139095040; |
|
mov.f32 %f123, 0f7F800000; |
|
@%p102 bra $L__BB0_5; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f393, %f33, %f123, %f123; |
|
$L__BB0_5: |
|
mov.b32 %f18, %r83; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p104, %f18, 0f00800000; |
|
mul.f32 %f124, %f18, 0f4B000000; |
|
selp.f32 %f38, %f124, %f18, %p104; |
|
selp.f32 %f125, 0fC1B80000, 0f00000000, %p104; |
|
mov.b32 %r144, %f38; |
|
add.s32 %r145, %r144, -1059760811; |
|
and.b32 %r146, %r145, -8388608; |
|
sub.s32 %r147, %r144, %r146; |
|
mov.b32 %f126, %r147; |
|
cvt.rn.f32.s32 %f127, %r146; |
|
fma.rn.ftz.f32 %f129, %f127, %f100, %f125; |
|
add.f32 %f130, %f126, 0fBF800000; |
|
fma.rn.ftz.f32 %f133, %f104, %f130, %f103; |
|
fma.rn.ftz.f32 %f135, %f133, %f130, %f106; |
|
fma.rn.ftz.f32 %f137, %f135, %f130, %f108; |
|
fma.rn.ftz.f32 %f139, %f137, %f130, %f110; |
|
fma.rn.ftz.f32 %f141, %f139, %f130, %f112; |
|
fma.rn.ftz.f32 %f143, %f141, %f130, %f114; |
|
fma.rn.ftz.f32 %f145, %f143, %f130, %f116; |
|
fma.rn.ftz.f32 %f147, %f145, %f130, %f118; |
|
mul.f32 %f148, %f130, %f147; |
|
fma.rn.ftz.f32 %f149, %f148, %f130, %f130; |
|
fma.rn.ftz.f32 %f394, %f129, %f122, %f149; |
|
setp.lt.u32 %p105, %r144, 2139095040; |
|
@%p105 bra $L__BB0_7; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f394, %f38, %f123, %f123; |
|
$L__BB0_7: |
|
mov.b32 %f19, %r84; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p107, %f19, 0f00800000; |
|
mul.f32 %f152, %f19, 0f4B000000; |
|
selp.f32 %f43, %f152, %f19, %p107; |
|
selp.f32 %f153, 0fC1B80000, 0f00000000, %p107; |
|
mov.b32 %r148, %f43; |
|
add.s32 %r149, %r148, -1059760811; |
|
and.b32 %r150, %r149, -8388608; |
|
sub.s32 %r151, %r148, %r150; |
|
mov.b32 %f154, %r151; |
|
cvt.rn.f32.s32 %f155, %r150; |
|
fma.rn.ftz.f32 %f157, %f155, %f100, %f153; |
|
add.f32 %f158, %f154, 0fBF800000; |
|
fma.rn.ftz.f32 %f161, %f104, %f158, %f103; |
|
fma.rn.ftz.f32 %f163, %f161, %f158, %f106; |
|
fma.rn.ftz.f32 %f165, %f163, %f158, %f108; |
|
fma.rn.ftz.f32 %f167, %f165, %f158, %f110; |
|
fma.rn.ftz.f32 %f169, %f167, %f158, %f112; |
|
fma.rn.ftz.f32 %f171, %f169, %f158, %f114; |
|
fma.rn.ftz.f32 %f173, %f171, %f158, %f116; |
|
fma.rn.ftz.f32 %f175, %f173, %f158, %f118; |
|
mul.f32 %f176, %f158, %f175; |
|
fma.rn.ftz.f32 %f177, %f176, %f158, %f158; |
|
fma.rn.ftz.f32 %f395, %f157, %f122, %f177; |
|
setp.lt.u32 %p108, %r148, 2139095040; |
|
@%p108 bra $L__BB0_9; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f395, %f43, %f123, %f123; |
|
$L__BB0_9: |
|
mov.b32 %f20, %r85; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p110, %f20, 0f00800000; |
|
mul.f32 %f180, %f20, 0f4B000000; |
|
selp.f32 %f48, %f180, %f20, %p110; |
|
selp.f32 %f181, 0fC1B80000, 0f00000000, %p110; |
|
mov.b32 %r152, %f48; |
|
add.s32 %r153, %r152, -1059760811; |
|
and.b32 %r154, %r153, -8388608; |
|
sub.s32 %r155, %r152, %r154; |
|
mov.b32 %f182, %r155; |
|
cvt.rn.f32.s32 %f183, %r154; |
|
fma.rn.ftz.f32 %f185, %f183, %f100, %f181; |
|
add.f32 %f186, %f182, 0fBF800000; |
|
fma.rn.ftz.f32 %f189, %f104, %f186, %f103; |
|
fma.rn.ftz.f32 %f191, %f189, %f186, %f106; |
|
fma.rn.ftz.f32 %f193, %f191, %f186, %f108; |
|
fma.rn.ftz.f32 %f195, %f193, %f186, %f110; |
|
fma.rn.ftz.f32 %f197, %f195, %f186, %f112; |
|
fma.rn.ftz.f32 %f199, %f197, %f186, %f114; |
|
fma.rn.ftz.f32 %f201, %f199, %f186, %f116; |
|
fma.rn.ftz.f32 %f203, %f201, %f186, %f118; |
|
mul.f32 %f204, %f186, %f203; |
|
fma.rn.ftz.f32 %f205, %f204, %f186, %f186; |
|
fma.rn.ftz.f32 %f396, %f185, %f122, %f205; |
|
setp.lt.u32 %p111, %r152, 2139095040; |
|
@%p111 bra $L__BB0_11; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f396, %f48, %f123, %f123; |
|
$L__BB0_11: |
|
mov.b32 %f21, %r90; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p113, %f21, 0f00800000; |
|
mul.f32 %f208, %f21, 0f4B000000; |
|
selp.f32 %f53, %f208, %f21, %p113; |
|
selp.f32 %f209, 0fC1B80000, 0f00000000, %p113; |
|
mov.b32 %r156, %f53; |
|
add.s32 %r157, %r156, -1059760811; |
|
and.b32 %r158, %r157, -8388608; |
|
sub.s32 %r159, %r156, %r158; |
|
mov.b32 %f210, %r159; |
|
cvt.rn.f32.s32 %f211, %r158; |
|
fma.rn.ftz.f32 %f213, %f211, %f100, %f209; |
|
add.f32 %f214, %f210, 0fBF800000; |
|
fma.rn.ftz.f32 %f217, %f104, %f214, %f103; |
|
fma.rn.ftz.f32 %f219, %f217, %f214, %f106; |
|
fma.rn.ftz.f32 %f221, %f219, %f214, %f108; |
|
fma.rn.ftz.f32 %f223, %f221, %f214, %f110; |
|
fma.rn.ftz.f32 %f225, %f223, %f214, %f112; |
|
fma.rn.ftz.f32 %f227, %f225, %f214, %f114; |
|
fma.rn.ftz.f32 %f229, %f227, %f214, %f116; |
|
fma.rn.ftz.f32 %f231, %f229, %f214, %f118; |
|
mul.f32 %f232, %f214, %f231; |
|
fma.rn.ftz.f32 %f233, %f232, %f214, %f214; |
|
fma.rn.ftz.f32 %f397, %f213, %f122, %f233; |
|
setp.lt.u32 %p114, %r156, 2139095040; |
|
@%p114 bra $L__BB0_13; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f397, %f53, %f123, %f123; |
|
$L__BB0_13: |
|
mov.b32 %f22, %r91; |
|
.loc 1 50 23 |
|
setp.lt.f32 %p116, %f22, 0f00800000; |
|
mul.f32 %f236, %f22, 0f4B000000; |
|
selp.f32 %f58, %f236, %f22, %p116; |
|
selp.f32 %f237, 0fC1B80000, 0f00000000, %p116; |
|
mov.b32 %r160, %f58; |
|
add.s32 %r161, %r160, -1059760811; |
|
and.b32 %r162, %r161, -8388608; |
|
sub.s32 %r163, %r160, %r162; |
|
mov.b32 %f238, %r163; |
|
cvt.rn.f32.s32 %f239, %r162; |
|
fma.rn.ftz.f32 %f241, %f239, %f100, %f237; |
|
add.f32 %f242, %f238, 0fBF800000; |
|
fma.rn.ftz.f32 %f245, %f104, %f242, %f103; |
|
fma.rn.ftz.f32 %f247, %f245, %f242, %f106; |
|
fma.rn.ftz.f32 %f249, %f247, %f242, %f108; |
|
fma.rn.ftz.f32 %f251, %f249, %f242, %f110; |
|
fma.rn.ftz.f32 %f253, %f251, %f242, %f112; |
|
fma.rn.ftz.f32 %f255, %f253, %f242, %f114; |
|
fma.rn.ftz.f32 %f257, %f255, %f242, %f116; |
|
fma.rn.ftz.f32 %f259, %f257, %f242, %f118; |
|
mul.f32 %f260, %f242, %f259; |
|
fma.rn.ftz.f32 %f261, %f260, %f242, %f242; |
|
fma.rn.ftz.f32 %f398, %f241, %f122, %f261; |
|
setp.lt.u32 %p117, %r160, 2139095040; |
|
@%p117 bra $L__BB0_15; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f398, %f58, %f123, %f123; |
|
$L__BB0_15: |
|
and.pred %p52, %p16, %p46; |
|
and.pred %p53, %p16, %p47; |
|
and.pred %p54, %p16, %p44; |
|
and.pred %p55, %p16, %p45; |
|
and.pred %p56, %p1, %p50; |
|
and.pred %p57, %p1, %p51; |
|
and.pred %p58, %p1, %p48; |
|
and.pred %p59, %p1, %p49; |
|
setp.eq.f32 %p103, %f33, 0f00000000; |
|
setp.eq.f32 %p106, %f38, 0f00000000; |
|
setp.eq.f32 %p109, %f43, 0f00000000; |
|
setp.eq.f32 %p112, %f48, 0f00000000; |
|
setp.eq.f32 %p115, %f53, 0f00000000; |
|
mov.b32 %f23, %r92; |
|
.loc 1 50 23 |
|
setp.eq.f32 %p118, %f58, 0f00000000; |
|
setp.lt.f32 %p119, %f23, 0f00800000; |
|
mul.f32 %f264, %f23, 0f4B000000; |
|
selp.f32 %f63, %f264, %f23, %p119; |
|
selp.f32 %f265, 0fC1B80000, 0f00000000, %p119; |
|
mov.b32 %r164, %f63; |
|
add.s32 %r165, %r164, -1059760811; |
|
and.b32 %r166, %r165, -8388608; |
|
sub.s32 %r167, %r164, %r166; |
|
mov.b32 %f266, %r167; |
|
cvt.rn.f32.s32 %f267, %r166; |
|
fma.rn.ftz.f32 %f269, %f267, %f100, %f265; |
|
add.f32 %f270, %f266, 0fBF800000; |
|
fma.rn.ftz.f32 %f273, %f104, %f270, %f103; |
|
fma.rn.ftz.f32 %f275, %f273, %f270, %f106; |
|
fma.rn.ftz.f32 %f277, %f275, %f270, %f108; |
|
fma.rn.ftz.f32 %f279, %f277, %f270, %f110; |
|
fma.rn.ftz.f32 %f281, %f279, %f270, %f112; |
|
fma.rn.ftz.f32 %f283, %f281, %f270, %f114; |
|
fma.rn.ftz.f32 %f285, %f283, %f270, %f116; |
|
fma.rn.ftz.f32 %f287, %f285, %f270, %f118; |
|
mul.f32 %f288, %f270, %f287; |
|
fma.rn.ftz.f32 %f289, %f288, %f270, %f270; |
|
fma.rn.ftz.f32 %f399, %f269, %f122, %f289; |
|
setp.lt.u32 %p120, %r164, 2139095040; |
|
@%p120 bra $L__BB0_17; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f399, %f63, %f123, %f123; |
|
$L__BB0_17: |
|
selp.u64 %rd78, 1, 0, %p55; |
|
selp.u64 %rd79, 1, 0, %p54; |
|
selp.u64 %rd80, 1, 0, %p53; |
|
selp.u64 %rd81, 1, 0, %p52; |
|
selp.u64 %rd82, 1, 0, %p59; |
|
selp.u64 %rd83, 1, 0, %p58; |
|
selp.u64 %rd84, 1, 0, %p57; |
|
selp.u64 %rd85, 1, 0, %p56; |
|
mov.b32 %f24, %r93; |
|
cvt.f32.bf16 %r131, %rs43; |
|
mov.b32 %f88, %r131; |
|
cvt.f32.bf16 %r132, %rs45; |
|
mov.b32 %f89, %r132; |
|
cvt.f32.bf16 %r133, %rs47; |
|
mov.b32 %f90, %r133; |
|
cvt.f32.bf16 %r134, %rs49; |
|
mov.b32 %f91, %r134; |
|
cvt.f32.bf16 %r135, %rs51; |
|
mov.b32 %f92, %r135; |
|
cvt.f32.bf16 %r136, %rs53; |
|
mov.b32 %f93, %r136; |
|
cvt.f32.bf16 %r137, %rs55; |
|
mov.b32 %f94, %r137; |
|
cvt.f32.bf16 %r138, %rs57; |
|
mov.b32 %f95, %r138; |
|
sub.f32 %f32, %f95, %f16; |
|
sub.f32 %f31, %f94, %f15; |
|
sub.f32 %f30, %f93, %f14; |
|
sub.f32 %f29, %f92, %f13; |
|
sub.f32 %f28, %f91, %f12; |
|
sub.f32 %f27, %f90, %f11; |
|
sub.f32 %f26, %f89, %f10; |
|
sub.f32 %f25, %f88, %f9; |
|
.loc 1 50 23 |
|
selp.f32 %f37, 0fFF800000, %f393, %p103; |
|
selp.f32 %f42, 0fFF800000, %f394, %p106; |
|
selp.f32 %f47, 0fFF800000, %f395, %p109; |
|
selp.f32 %f52, 0fFF800000, %f396, %p112; |
|
selp.f32 %f57, 0fFF800000, %f397, %p115; |
|
selp.f32 %f62, 0fFF800000, %f398, %p118; |
|
setp.eq.f32 %p121, %f63, 0f00000000; |
|
selp.f32 %f67, 0fFF800000, %f399, %p121; |
|
setp.lt.f32 %p122, %f24, 0f00800000; |
|
mul.f32 %f292, %f24, 0f4B000000; |
|
selp.f32 %f68, %f292, %f24, %p122; |
|
selp.f32 %f293, 0fC1B80000, 0f00000000, %p122; |
|
mov.b32 %r168, %f68; |
|
add.s32 %r169, %r168, -1059760811; |
|
and.b32 %r170, %r169, -8388608; |
|
sub.s32 %r171, %r168, %r170; |
|
mov.b32 %f294, %r171; |
|
cvt.rn.f32.s32 %f295, %r170; |
|
fma.rn.ftz.f32 %f297, %f295, %f100, %f293; |
|
add.f32 %f298, %f294, 0fBF800000; |
|
fma.rn.ftz.f32 %f301, %f104, %f298, %f103; |
|
fma.rn.ftz.f32 %f303, %f301, %f298, %f106; |
|
fma.rn.ftz.f32 %f305, %f303, %f298, %f108; |
|
fma.rn.ftz.f32 %f307, %f305, %f298, %f110; |
|
fma.rn.ftz.f32 %f309, %f307, %f298, %f112; |
|
fma.rn.ftz.f32 %f311, %f309, %f298, %f114; |
|
fma.rn.ftz.f32 %f313, %f311, %f298, %f116; |
|
fma.rn.ftz.f32 %f315, %f313, %f298, %f118; |
|
mul.f32 %f316, %f298, %f315; |
|
fma.rn.ftz.f32 %f317, %f316, %f298, %f298; |
|
fma.rn.ftz.f32 %f400, %f297, %f122, %f317; |
|
setp.lt.u32 %p123, %r168, 2139095040; |
|
@%p123 bra $L__BB0_19; |
|
.loc 1 0 23 |
|
fma.rn.ftz.f32 %f400, %f68, %f123, %f123; |
|
bra.uni $L__BB0_19; |
|
$L__BB0_20: |
|
.loc 1 24 33 |
|
bfe.u32 %r191, %r1, 5, 3; |
|
and.b32 %r192, %r1, 31; |
|
$L__tmp1: |
|
.loc 2 243 36 |
|
bar.sync 0; |
|
$L__tmp2: |
|
.loc 2 233 15 |
|
add.s64 %rd146, %rd201, %rd202; |
|
add.s64 %rd147, %rd146, %rd203; |
|
add.s64 %rd148, %rd147, %rd204; |
|
add.s64 %rd149, %rd148, %rd205; |
|
add.s64 %rd150, %rd149, %rd206; |
|
add.s64 %rd151, %rd150, %rd207; |
|
add.s64 %rd152, %rd151, %rd208; |
|
$L__tmp3: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r193, %rd152; |
|
shfl.sync.bfly.b32 %r194, %r193, 16, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r195}, %rd152; } |
|
shfl.sync.bfly.b32 %r196, %r195, 16, 31, -1; |
|
cvt.u64.u32 %rd153, %r194; |
|
cvt.u64.u32 %rd154, %r196; |
|
shl.b64 %rd155, %rd154, 32; |
|
or.b64 %rd156, %rd153, %rd155; |
|
$L__tmp4: |
|
.loc 2 233 15 |
|
add.s64 %rd157, %rd152, %rd156; |
|
$L__tmp5: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r197, %rd157; |
|
shfl.sync.bfly.b32 %r198, %r197, 8, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r199}, %rd157; } |
|
shfl.sync.bfly.b32 %r200, %r199, 8, 31, -1; |
|
cvt.u64.u32 %rd158, %r198; |
|
cvt.u64.u32 %rd159, %r200; |
|
shl.b64 %rd160, %rd159, 32; |
|
or.b64 %rd161, %rd158, %rd160; |
|
$L__tmp6: |
|
.loc 2 233 15 |
|
add.s64 %rd162, %rd157, %rd161; |
|
$L__tmp7: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r201, %rd162; |
|
shfl.sync.bfly.b32 %r202, %r201, 4, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r203}, %rd162; } |
|
shfl.sync.bfly.b32 %r204, %r203, 4, 31, -1; |
|
cvt.u64.u32 %rd163, %r202; |
|
cvt.u64.u32 %rd164, %r204; |
|
shl.b64 %rd165, %rd164, 32; |
|
or.b64 %rd166, %rd163, %rd165; |
|
$L__tmp8: |
|
.loc 2 233 15 |
|
add.s64 %rd167, %rd162, %rd166; |
|
$L__tmp9: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r205, %rd167; |
|
shfl.sync.bfly.b32 %r206, %r205, 2, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r207}, %rd167; } |
|
shfl.sync.bfly.b32 %r208, %r207, 2, 31, -1; |
|
cvt.u64.u32 %rd168, %r206; |
|
cvt.u64.u32 %rd169, %r208; |
|
shl.b64 %rd170, %rd169, 32; |
|
or.b64 %rd171, %rd168, %rd170; |
|
$L__tmp10: |
|
.loc 2 233 15 |
|
add.s64 %rd172, %rd167, %rd171; |
|
$L__tmp11: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r209, %rd172; |
|
shfl.sync.bfly.b32 %r210, %r209, 1, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r211}, %rd172; } |
|
shfl.sync.bfly.b32 %r212, %r211, 1, 31, -1; |
|
cvt.u64.u32 %rd173, %r210; |
|
cvt.u64.u32 %rd174, %r212; |
|
shl.b64 %rd175, %rd174, 32; |
|
or.b64 %rd176, %rd173, %rd175; |
|
$L__tmp12: |
|
.loc 2 233 15 |
|
add.s64 %rd140, %rd172, %rd176; |
|
$L__tmp13: |
|
.loc 2 243 36 |
|
setp.eq.s32 %p134, %r192, 0; |
|
shl.b32 %r213, %r191, 3; |
|
add.s32 %r180, %r44, %r213; |
|
@%p134 st.shared.b64 [ %r180 + 0 ], %rd140; |
|
bar.sync 0; |
|
setp.lt.s32 %p135, %r1, 8; |
|
shl.b32 %r215, %r1, 3; |
|
add.s32 %r181, %r44, %r215; |
|
@%p135 ld.shared.b64 %rd141, [ %r181 + 0 ]; |
|
cvt.u32.u64 %r216, %rd141; |
|
shfl.sync.bfly.b32 %r217, %r216, 4, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r218}, %rd141; } |
|
shfl.sync.bfly.b32 %r219, %r218, 4, 31, -1; |
|
cvt.u64.u32 %rd177, %r217; |
|
cvt.u64.u32 %rd178, %r219; |
|
shl.b64 %rd179, %rd178, 32; |
|
or.b64 %rd180, %rd177, %rd179; |
|
$L__tmp14: |
|
.loc 2 233 15 |
|
add.s64 %rd181, %rd141, %rd180; |
|
$L__tmp15: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r220, %rd181; |
|
shfl.sync.bfly.b32 %r221, %r220, 2, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r222}, %rd181; } |
|
shfl.sync.bfly.b32 %r223, %r222, 2, 31, -1; |
|
cvt.u64.u32 %rd182, %r221; |
|
cvt.u64.u32 %rd183, %r223; |
|
shl.b64 %rd184, %rd183, 32; |
|
or.b64 %rd185, %rd182, %rd184; |
|
$L__tmp16: |
|
.loc 2 233 15 |
|
add.s64 %rd186, %rd181, %rd185; |
|
$L__tmp17: |
|
.loc 2 243 36 |
|
cvt.u32.u64 %r224, %rd186; |
|
shfl.sync.bfly.b32 %r225, %r224, 1, 31, -1; |
|
{ .reg .b32 tmp; mov.b64 {tmp, %r226}, %rd186; } |
|
shfl.sync.bfly.b32 %r227, %r226, 1, 31, -1; |
|
cvt.u64.u32 %rd187, %r225; |
|
cvt.u64.u32 %rd188, %r227; |
|
shl.b64 %rd189, %rd188, 32; |
|
or.b64 %rd190, %rd187, %rd189; |
|
$L__tmp18: |
|
.loc 2 233 15 |
|
add.s64 %rd142, %rd186, %rd190; |
|
$L__tmp19: |
|
.loc 2 243 36 |
|
and.b32 %r228, %r1, 7; |
|
setp.eq.s32 %p143, %r228, 0; |
|
and.pred %p136, %p135, %p143; |
|
@%p136 st.shared.b64 [ %r181 + 0 ], %rd142; |
|
bar.sync 0; |
|
ld.shared.u32 %rd191, [global_smem+4]; |
|
shl.b64 %rd192, %rd191, 32; |
|
ld.shared.u32 %rd193, [global_smem]; |
|
or.b64 %rd194, %rd192, %rd193; |
|
$L__tmp20: |
|
.loc 1 60 28 |
|
bar.sync 0; |
|
st.shared.u64 [global_smem], %rd194; |
|
bar.sync 0; |
|
ld.shared.u64 %rd143, [global_smem]; |
|
.loc 1 61 25 |
|
shl.b64 %rd195, %rd1, 3; |
|
add.s64 %rd144, %rd39, %rd195; |
|
.loc 1 61 36 |
|
setp.eq.s32 %p144, %r2, 0; |
|
and.pred %p137, %p144, %p85; |
|
@%p137 st.global.b64 [ %rd144 + 0 ], { %rd143 }; |
|
$L__tmp21: |
|
.loc 2 243 36 |
|
bar.sync 0; |
|
$L__tmp22: |
|
.loc 2 233 15 |
|
add.f32 %f361, %f385, %f386; |
|
add.f32 %f362, %f387, %f361; |
|
add.f32 %f363, %f388, %f362; |
|
add.f32 %f364, %f389, %f363; |
|
add.f32 %f365, %f390, %f364; |
|
add.f32 %f366, %f391, %f365; |
|
add.f32 %f367, %f392, %f366; |
|
$L__tmp23: |
|
.loc 2 243 36 |
|
mov.b32 %r229, %f367; |
|
shfl.sync.bfly.b32 %r230, %r229, 16, 31, -1; |
|
mov.b32 %f368, %r230; |
|
$L__tmp24: |
|
.loc 2 233 15 |
|
add.f32 %f369, %f367, %f368; |
|
$L__tmp25: |
|
.loc 2 243 36 |
|
mov.b32 %r231, %f369; |
|
shfl.sync.bfly.b32 %r232, %r231, 8, 31, -1; |
|
mov.b32 %f370, %r232; |
|
$L__tmp26: |
|
.loc 2 233 15 |
|
add.f32 %f371, %f369, %f370; |
|
$L__tmp27: |
|
.loc 2 243 36 |
|
mov.b32 %r233, %f371; |
|
shfl.sync.bfly.b32 %r234, %r233, 4, 31, -1; |
|
mov.b32 %f372, %r234; |
|
$L__tmp28: |
|
.loc 2 233 15 |
|
add.f32 %f373, %f371, %f372; |
|
$L__tmp29: |
|
.loc 2 243 36 |
|
mov.b32 %r235, %f373; |
|
shfl.sync.bfly.b32 %r236, %r235, 2, 31, -1; |
|
mov.b32 %f374, %r236; |
|
$L__tmp30: |
|
.loc 2 233 15 |
|
add.f32 %f375, %f373, %f374; |
|
$L__tmp31: |
|
.loc 2 243 36 |
|
mov.b32 %r237, %f375; |
|
shfl.sync.bfly.b32 %r238, %r237, 1, 31, -1; |
|
mov.b32 %f376, %r238; |
|
$L__tmp32: |
|
.loc 2 233 15 |
|
add.f32 %f377, %f375, %f376; |
|
$L__tmp33: |
|
.loc 2 243 36 |
|
shl.b32 %r239, %r191, 2; |
|
add.s32 %r183, %r44, %r239; |
|
mov.b32 %r184, %f377; |
|
@%p134 st.shared.b32 [ %r183 + 0 ], %r184; |
|
bar.sync 0; |
|
shl.b32 %r240, %r1, 2; |
|
add.s32 %r186, %r44, %r240; |
|
@%p135 ld.shared.b32 %r185, [ %r186 + 0 ]; |
|
mov.b32 %f378, %r185; |
|
shfl.sync.bfly.b32 %r241, %r185, 4, 31, -1; |
|
mov.b32 %f379, %r241; |
|
$L__tmp34: |
|
.loc 2 233 15 |
|
add.f32 %f380, %f378, %f379; |
|
$L__tmp35: |
|
.loc 2 243 36 |
|
mov.b32 %r242, %f380; |
|
shfl.sync.bfly.b32 %r243, %r242, 2, 31, -1; |
|
mov.b32 %f381, %r243; |
|
$L__tmp36: |
|
.loc 2 233 15 |
|
add.f32 %f382, %f380, %f381; |
|
$L__tmp37: |
|
.loc 2 243 36 |
|
mov.b32 %r244, %f382; |
|
shfl.sync.bfly.b32 %r245, %r244, 1, 31, -1; |
|
mov.b32 %f383, %r245; |
|
$L__tmp38: |
|
.loc 2 233 15 |
|
add.f32 %f384, %f382, %f383; |
|
$L__tmp39: |
|
.loc 2 243 36 |
|
mov.b32 %r188, %f384; |
|
@%p136 st.shared.b32 [ %r186 + 0 ], %r188; |
|
bar.sync 0; |
|
ld.shared.u32 %r189, [global_smem]; |
|
$L__tmp40: |
|
.loc 1 63 25 |
|
shl.b64 %rd196, %rd1, 2; |
|
add.s64 %rd145, %rd40, %rd196; |
|
.loc 1 63 37 |
|
@%p137 st.global.b32 [ %rd145 + 0 ], { %r189 }; |
|
.loc 1 63 4 |
|
ret; |
|
$L__tmp41: |
|
$L__func_end0: |
|
|
|
} |
|
// .globl __nv_logf |
|
.visible .func (.param .b32 func_retval0) __nv_logf( |
|
.param .b32 __nv_logf_param_0 |
|
) |
|
{ |
|
.reg .pred %p<4>; |
|
.reg .b32 %r<5>; |
|
.reg .f32 %f<36>; |
|
$L__func_begin1: |
|
|
|
ld.param.f32 %f5, [__nv_logf_param_0]; |
|
setp.lt.f32 %p1, %f5, 0f00800000; |
|
mul.f32 %f6, %f5, 0f4B000000; |
|
selp.f32 %f1, %f6, %f5, %p1; |
|
selp.f32 %f7, 0fC1B80000, 0f00000000, %p1; |
|
mov.b32 %r1, %f1; |
|
add.s32 %r2, %r1, -1059760811; |
|
and.b32 %r3, %r2, -8388608; |
|
sub.s32 %r4, %r1, %r3; |
|
mov.b32 %f8, %r4; |
|
cvt.rn.f32.s32 %f9, %r3; |
|
mov.f32 %f10, 0f34000000; |
|
fma.rn.ftz.f32 %f11, %f9, %f10, %f7; |
|
add.f32 %f12, %f8, 0fBF800000; |
|
mov.f32 %f13, 0f3E1039F6; |
|
mov.f32 %f14, 0fBE055027; |
|
fma.rn.ftz.f32 %f15, %f14, %f12, %f13; |
|
mov.f32 %f16, 0fBDF8CDCC; |
|
fma.rn.ftz.f32 %f17, %f15, %f12, %f16; |
|
mov.f32 %f18, 0f3E0F2955; |
|
fma.rn.ftz.f32 %f19, %f17, %f12, %f18; |
|
mov.f32 %f20, 0fBE2AD8B9; |
|
fma.rn.ftz.f32 %f21, %f19, %f12, %f20; |
|
mov.f32 %f22, 0f3E4CED0B; |
|
fma.rn.ftz.f32 %f23, %f21, %f12, %f22; |
|
mov.f32 %f24, 0fBE7FFF22; |
|
fma.rn.ftz.f32 %f25, %f23, %f12, %f24; |
|
mov.f32 %f26, 0f3EAAAA78; |
|
fma.rn.ftz.f32 %f27, %f25, %f12, %f26; |
|
mov.f32 %f28, 0fBF000000; |
|
fma.rn.ftz.f32 %f29, %f27, %f12, %f28; |
|
mul.f32 %f30, %f12, %f29; |
|
fma.rn.ftz.f32 %f31, %f30, %f12, %f12; |
|
mov.f32 %f32, 0f3F317218; |
|
fma.rn.ftz.f32 %f35, %f11, %f32, %f31; |
|
setp.lt.u32 %p2, %r1, 2139095040; |
|
@%p2 bra $L__BB1_2; |
|
mov.f32 %f33, 0f7F800000; |
|
fma.rn.ftz.f32 %f35, %f1, %f33, %f33; |
|
$L__BB1_2: |
|
setp.eq.f32 %p3, %f1, 0f00000000; |
|
selp.f32 %f34, 0fFF800000, %f35, %p3; |
|
st.param.f32 [func_retval0+0], %f34; |
|
ret; |
|
$L__func_end1: |
|
|
|
} |
|
.file 1 "/tmp/torchinductor_root/ul/culwqy52mqs4o2bmqocf2r5plomw2phviv5gutbxlcpdrdkc46ri.py" |
|
.file 2 "/usr/local/lib/python3.10/dist-packages/triton/language/standard.py" |
|
.section .debug_abbrev |
|
{ |
|
.b8 1 |
|
.b8 17 |
|
.b8 1 |
|
.b8 37 |
|
.b8 8 |
|
.b8 19 |
|
.b8 5 |
|
.b8 3 |
|
.b8 8 |
|
.b8 16 |
|
.b8 6 |
|
.b8 27 |
|
.b8 8 |
|
.b8 180 |
|
.b8 66 |
|
.b8 12 |
|
.b8 17 |
|
.b8 1 |
|
.b8 18 |
|
.b8 1 |
|
.b8 0 |
|
.b8 0 |
|
.b8 2 |
|
.b8 46 |
|
.b8 0 |
|
.b8 135 |
|
.b8 64 |
|
.b8 8 |
|
.b8 3 |
|
.b8 8 |
|
.b8 58 |
|
.b8 11 |
|
.b8 59 |
|
.b8 11 |
|
.b8 63 |
|
.b8 12 |
|
.b8 32 |
|
.b8 11 |
|
.b8 0 |
|
.b8 0 |
|
.b8 3 |
|
.b8 46 |
|
.b8 1 |
|
.b8 17 |
|
.b8 1 |
|
.b8 18 |
|
.b8 1 |
|
.b8 64 |
|
.b8 10 |
|
.b8 49 |
|
.b8 19 |
|
.b8 0 |
|
.b8 0 |
|
.b8 4 |
|
.b8 29 |
|
.b8 0 |
|
.b8 49 |
|
.b8 19 |
|
.b8 17 |
|
.b8 1 |
|
.b8 18 |
|
.b8 1 |
|
.b8 88 |
|
.b8 11 |
|
.b8 89 |
|
.b8 11 |
|
.b8 87 |
|
.b8 11 |
|
.b8 0 |
|
.b8 0 |
|
.b8 5 |
|
.b8 29 |
|
.b8 1 |
|
.b8 49 |
|
.b8 19 |
|
.b8 17 |
|
.b8 1 |
|
.b8 18 |
|
.b8 1 |
|
.b8 88 |
|
.b8 11 |
|
.b8 89 |
|
.b8 11 |
|
.b8 87 |
|
.b8 11 |
|
.b8 0 |
|
.b8 0 |
|
.b8 0 |
|
} |
|
.section .debug_info |
|
{ |
|
.b32 349 |
|
.b8 2 |
|
.b8 0 |
|
.b32 .debug_abbrev |
|
.b8 8 |
|
.b8 1 |
|
.b8 116 |
|
.b8 114 |
|
.b8 105 |
|
.b8 116 |
|
.b8 111 |
|
.b8 110 |
|
.b8 0 |
|
.b8 2 |
|
.b8 0 |
|
.b8 99 |
|
.b8 117 |
|
.b8 108 |
|
.b8 119 |
|
.b8 113 |
|
.b8 121 |
|
.b8 53 |
|
.b8 50 |
|
.b8 109 |
|
.b8 113 |
|
.b8 115 |
|
.b8 52 |
|
.b8 111 |
|
.b8 50 |
|
.b8 98 |
|
.b8 109 |
|
.b8 113 |
|
.b8 111 |
|
.b8 99 |
|
.b8 102 |
|
.b8 50 |
|
.b8 114 |
|
.b8 53 |
|
.b8 112 |
|
.b8 108 |
|
.b8 111 |
|
.b8 109 |
|
.b8 119 |
|
.b8 50 |
|
.b8 112 |
|
.b8 104 |
|
.b8 118 |
|
.b8 105 |
|
.b8 118 |
|
.b8 53 |
|
.b8 103 |
|
.b8 117 |
|
.b8 116 |
|
.b8 98 |
|
.b8 120 |
|
.b8 108 |
|
.b8 99 |
|
.b8 112 |
|
.b8 100 |
|
.b8 114 |
|
.b8 100 |
|
.b8 107 |
|
.b8 99 |
|
.b8 52 |
|
.b8 54 |
|
.b8 114 |
|
.b8 105 |
|
.b8 46 |
|
.b8 112 |
|
.b8 121 |
|
.b8 0 |
|
.b32 .debug_line |
|
.b8 47 |
|
.b8 116 |
|
.b8 109 |
|
.b8 112 |
|
.b8 47 |
|
.b8 116 |
|
.b8 111 |
|
.b8 114 |
|
.b8 99 |
|
.b8 104 |
|
.b8 105 |
|
.b8 110 |
|
.b8 100 |
|
.b8 117 |
|
.b8 99 |
|
.b8 116 |
|
.b8 111 |
|
.b8 114 |
|
.b8 95 |
|
.b8 114 |
|
.b8 111 |
|
.b8 111 |
|
.b8 116 |
|
.b8 47 |
|
.b8 117 |
|
.b8 108 |
|
.b8 0 |
|
.b8 1 |
|
.b64 $L__func_begin0 |
|
.b64 $L__func_end0 |
|
.b8 2 |
|
.b8 116 |
|
.b8 114 |
|
.b8 105 |
|
.b8 116 |
|
.b8 111 |
|
.b8 110 |
|
.b8 95 |
|
.b8 95 |
|
.b8 48 |
|
.b8 100 |
|
.b8 49 |
|
.b8 100 |
|
.b8 50 |
|
.b8 100 |
|
.b8 51 |
|
.b8 100 |
|
.b8 52 |
|
.b8 100 |
|
.b8 53 |
|
.b8 100 |
|
.b8 54 |
|
.b8 101 |
|
.b8 55 |
|
.b8 100 |
|
.b8 101 |
|
.b8 0 |
|
.b8 116 |
|
.b8 114 |
|
.b8 105 |
|
.b8 116 |
|
.b8 111 |
|
.b8 110 |
|
.b8 95 |
|
.b8 95 |
|
.b8 48 |
|
.b8 100 |
|
.b8 49 |
|
.b8 100 |
|
.b8 50 |
|
.b8 100 |
|
.b8 51 |
|
.b8 100 |
|
.b8 52 |
|
.b8 100 |
|
.b8 53 |
|
.b8 100 |
|
.b8 54 |
|
.b8 101 |
|
.b8 55 |
|
.b8 100 |
|
.b8 101 |
|
.b8 0 |
|
.b8 1 |
|
.b8 18 |
|
.b8 1 |
|
.b8 1 |
|
.b8 3 |
|
.b64 $L__func_begin0 |
|
.b64 $L__func_end0 |
|
.b8 1 |
|
.b8 156 |
|
.b32 125 |
|
.b8 4 |
|
.b32 125 |
|
.b64 $L__tmp1 |
|
.b64 $L__tmp20 |
|
.b8 2 |
|
.b8 60 |
|
.b8 25 |
|
.b8 5 |
|
.b32 125 |
|
.b64 $L__tmp2 |
|
.b64 $L__tmp19 |
|
.b8 2 |
|
.b8 60 |
|
.b8 25 |
|
.b8 4 |
|
.b32 125 |
|
.b64 $L__tmp2 |
|
.b64 $L__tmp19 |
|
.b8 2 |
|
.b8 243 |
|
.b8 36 |
|
.b8 0 |
|
.b8 4 |
|
.b32 125 |
|
.b64 $L__tmp21 |
|
.b64 $L__tmp40 |
|
.b8 2 |
|
.b8 62 |
|
.b8 27 |
|
.b8 5 |
|
.b32 125 |
|
.b64 $L__tmp22 |
|
.b64 $L__tmp39 |
|
.b8 2 |
|
.b8 62 |
|
.b8 27 |
|
.b8 4 |
|
.b32 125 |
|
.b64 $L__tmp22 |
|
.b64 $L__tmp39 |
|
.b8 2 |
|
.b8 243 |
|
.b8 36 |
|
.b8 0 |
|
.b8 0 |
|
.b8 0 |
|
} |
|
.section .debug_pubnames |
|
{ |
|
.b32 $L__pubNames_end0-$L__pubNames_start0 |
|
$L__pubNames_start0: |
|
.b8 2 |
|
.b8 0 |
|
.b32 .debug_info |
|
.b32 353 |
|
.b32 125 |
|
.b8 116 |
|
.b8 114 |
|
.b8 105 |
|
.b8 116 |
|
.b8 111 |
|
.b8 110 |
|
.b8 95 |
|
.b8 95 |
|
.b8 48 |
|
.b8 100 |
|
.b8 49 |
|
.b8 100 |
|
.b8 50 |
|
.b8 100 |
|
.b8 51 |
|
.b8 100 |
|
.b8 52 |
|
.b8 100 |
|
.b8 53 |
|
.b8 100 |
|
.b8 54 |
|
.b8 101 |
|
.b8 55 |
|
.b8 100 |
|
.b8 101 |
|
.b8 0 |
|
.b32 0 |
|
$L__pubNames_end0: |
|
} |
|
.section .debug_pubtypes |
|
{ |
|
.b32 $L__pubTypes_end0-$L__pubTypes_start0 |
|
$L__pubTypes_start0: |
|
.b8 2 |
|
.b8 0 |
|
.b32 .debug_info |
|
.b32 353 |
|
.b32 0 |
|
$L__pubTypes_end0: |
|
} |
|
.section .debug_loc { } |
|
|