diff --git "a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil" @@ -0,0 +1,1025 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}})] +{ + func main(tensor cache_length, tensor decoder_key_padding_mask, tensor encoder_output_embeds, tensor input_ids, tensor key_cache, tensor kv_cache_update_mask, tensor value_cache) { + tensor var_24_axis_0 = const()[name = tensor("op_24_axis_0"), val = tensor(0)]; + tensor var_24_batch_dims_0 = const()[name = tensor("op_24_batch_dims_0"), val = tensor(0)]; + tensor embed_tokens_weight_to_fp16 = const()[name = tensor("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_24_cast_fp16 = gather(axis = var_24_axis_0, batch_dims = var_24_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor("op_24_cast_fp16")]; + tensor var_31_axis_0 = const()[name = tensor("op_31_axis_0"), val = tensor(0)]; + tensor var_31_batch_dims_0 = const()[name = tensor("op_31_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = tensor("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132777088)))]; + tensor var_31_cast_fp16 = gather(axis = var_31_axis_0, batch_dims = var_31_batch_dims_0, indices = cache_length, x = embed_positions_inlier_module_weight_to_fp16)[name = tensor("op_31_cast_fp16")]; + tensor var_33_axis_0 = const()[name = tensor("op_33_axis_0"), val = tensor(0)]; + tensor var_33_batch_dims_0 = const()[name = tensor("op_33_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133941312))), name = tensor("embed_positions_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133924032))), shape = tensor([448, 1280])]; + tensor var_33_cast_fp16 = gather(axis = var_33_axis_0, batch_dims = var_33_batch_dims_0, indices = cache_length, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = tensor("op_33_cast_fp16")]; + tensor var_34_cast_fp16 = add(x = var_31_cast_fp16, y = var_33_cast_fp16)[name = tensor("op_34_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = add(x = var_24_cast_fp16, y = var_34_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_48_axes_0 = const()[name = tensor("op_48_axes_0"), val = tensor([2])]; + tensor var_48_cast_fp16 = expand_dims(axes = var_48_axes_0, x = hidden_states_1_cast_fp16)[name = tensor("op_48_cast_fp16")]; + tensor inputs_1_axes_0 = const()[name = tensor("inputs_1_axes_0"), val = tensor([3])]; + tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_48_cast_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor tile_0 = const()[name = tensor("tile_0"), val = tensor([1280, 1280, 1280, 1280])]; + tensor var_53_axis_0 = const()[name = tensor("op_53_axis_0"), val = tensor(1)]; + tensor var_53_cast_fp16_0, tensor var_53_cast_fp16_1, tensor var_53_cast_fp16_2, tensor var_53_cast_fp16_3 = split(axis = var_53_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor("op_53_cast_fp16")]; + tensor tile_1 = const()[name = tensor("tile_1"), val = tensor([1280, 1280, 1280, 1280])]; + tensor var_60_axis_0 = const()[name = tensor("op_60_axis_0"), val = tensor(1)]; + tensor var_60_cast_fp16_0, tensor var_60_cast_fp16_1, tensor var_60_cast_fp16_2, tensor var_60_cast_fp16_3 = split(axis = var_60_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor("op_60_cast_fp16")]; + tensor var_70 = const()[name = tensor("op_70"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_96_to_fp16 = const()[name = tensor("op_96_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_96_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134013056)))]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134015680)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134018304)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134020928)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_118_pad_type_0 = const()[name = tensor("op_118_pad_type_0"), val = tensor("valid")]; + tensor var_118_strides_0 = const()[name = tensor("op_118_strides_0"), val = tensor([1, 1])]; + tensor var_118_pad_0 = const()[name = tensor("op_118_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_118_dilations_0 = const()[name = tensor("op_118_dilations_0"), val = tensor([1, 1])]; + tensor var_118_groups_0 = const()[name = tensor("op_118_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134023552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134842816))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134842944)))]; + tensor var_118_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_118_dilations_0, groups = var_118_groups_0, pad = var_118_pad_0, pad_type = var_118_pad_type_0, strides = var_118_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_118_cast_fp16")]; + tensor var_124_pad_type_0 = const()[name = tensor("op_124_pad_type_0"), val = tensor("valid")]; + tensor var_124_strides_0 = const()[name = tensor("op_124_strides_0"), val = tensor([1, 1])]; + tensor var_124_pad_0 = const()[name = tensor("op_124_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_124_dilations_0 = const()[name = tensor("op_124_dilations_0"), val = tensor([1, 1])]; + tensor var_124_groups_0 = const()[name = tensor("op_124_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134918592))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134845568))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_124_cast_fp16 = conv(dilations = var_124_dilations_0, groups = var_124_groups_0, pad = var_124_pad_0, pad_type = var_124_pad_type_0, strides = var_124_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_124_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_118_cast_fp16, y = var_124_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_133_pad_type_0 = const()[name = tensor("op_133_pad_type_0"), val = tensor("valid")]; + tensor var_133_strides_0 = const()[name = tensor("op_133_strides_0"), val = tensor([1, 1])]; + tensor var_133_pad_0 = const()[name = tensor("op_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_133_dilations_0 = const()[name = tensor("op_133_dilations_0"), val = tensor([1, 1])]; + tensor var_133_groups_0 = const()[name = tensor("op_133_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135123456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135942720))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_133_cast_fp16 = conv(dilations = var_133_dilations_0, groups = var_133_groups_0, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_133_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_133_cast_fp16")]; + tensor var_139_pad_type_0 = const()[name = tensor("op_139_pad_type_0"), val = tensor("valid")]; + tensor var_139_strides_0 = const()[name = tensor("op_139_strides_0"), val = tensor([1, 1])]; + tensor var_139_pad_0 = const()[name = tensor("op_139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_139_dilations_0 = const()[name = tensor("op_139_dilations_0"), val = tensor([1, 1])]; + tensor var_139_groups_0 = const()[name = tensor("op_139_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135976320))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135942848))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_139_cast_fp16 = conv(dilations = var_139_dilations_0, groups = var_139_groups_0, pad = var_139_pad_0, pad_type = var_139_pad_type_0, strides = var_139_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_139_cast_fp16")]; + tensor current_key_1_cast_fp16 = add(x = var_133_cast_fp16, y = var_139_cast_fp16)[name = tensor("current_key_1_cast_fp16")]; + tensor var_149_pad_type_0 = const()[name = tensor("op_149_pad_type_0"), val = tensor("valid")]; + tensor var_149_strides_0 = const()[name = tensor("op_149_strides_0"), val = tensor([1, 1])]; + tensor var_149_pad_0 = const()[name = tensor("op_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_149_dilations_0 = const()[name = tensor("op_149_dilations_0"), val = tensor([1, 1])]; + tensor var_149_groups_0 = const()[name = tensor("op_149_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136181184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137000448))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137000576)))]; + tensor var_149_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_149_dilations_0, groups = var_149_groups_0, pad = var_149_pad_0, pad_type = var_149_pad_type_0, strides = var_149_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_149_cast_fp16")]; + tensor var_155_pad_type_0 = const()[name = tensor("op_155_pad_type_0"), val = tensor("valid")]; + tensor var_155_strides_0 = const()[name = tensor("op_155_strides_0"), val = tensor([1, 1])]; + tensor var_155_pad_0 = const()[name = tensor("op_155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_dilations_0 = const()[name = tensor("op_155_dilations_0"), val = tensor([1, 1])]; + tensor var_155_groups_0 = const()[name = tensor("op_155_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137046720))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137003200))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_155_cast_fp16 = conv(dilations = var_155_dilations_0, groups = var_155_groups_0, pad = var_155_pad_0, pad_type = var_155_pad_type_0, strides = var_155_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor current_value_1_cast_fp16 = add(x = var_149_cast_fp16, y = var_155_cast_fp16)[name = tensor("current_value_1_cast_fp16")]; + tensor var_158_axes_0 = const()[name = tensor("op_158_axes_0"), val = tensor([1])]; + tensor var_158_cast_fp16 = expand_dims(axes = var_158_axes_0, x = kv_cache_update_mask)[name = tensor("op_158_cast_fp16")]; + tensor var_159_axes_0 = const()[name = tensor("op_159_axes_0"), val = tensor([2])]; + tensor var_159_cast_fp16 = expand_dims(axes = var_159_axes_0, x = var_158_cast_fp16)[name = tensor("op_159_cast_fp16")]; + tensor var_161_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_71_to_fp16 = const()[name = tensor("op_71_to_fp16"), val = tensor(0x1p+0)]; + tensor var_162_cast_fp16 = sub(x = var_71_to_fp16, y = var_159_cast_fp16)[name = tensor("op_162_cast_fp16")]; + tensor var_163_cast_fp16 = mul(x = var_53_cast_fp16_0, y = var_162_cast_fp16)[name = tensor("op_163_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_161_cast_fp16, y = var_163_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_165_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_165_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = var_60_cast_fp16_0, y = var_162_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_165_cast_fp16, y = var_167_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_170 = const()[name = tensor("op_170"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_170, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_172_to_fp16 = const()[name = tensor("op_172_to_fp16"), val = tensor(0x1p-3)]; + tensor var_173_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_172_to_fp16)[name = tensor("op_173_cast_fp16")]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 20, 64, -1])]; + tensor var_175_cast_fp16 = reshape(shape = var_174, x = key_1_cast_fp16)[name = tensor("op_175_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_173_cast_fp16, y = var_175_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_179_axes_0 = const()[name = tensor("op_179_axes_0"), val = tensor([1])]; + tensor var_179_cast_fp16 = expand_dims(axes = var_179_axes_0, x = decoder_key_padding_mask)[name = tensor("op_179_cast_fp16")]; + tensor var_180_axes_0 = const()[name = tensor("op_180_axes_0"), val = tensor([2])]; + tensor var_180_cast_fp16 = expand_dims(axes = var_180_axes_0, x = var_179_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_180_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_183_cast_fp16 = softmax(axis = var_70, x = mh_w_3_cast_fp16)[name = tensor("op_183_cast_fp16")]; + tensor var_184 = const()[name = tensor("op_184"), val = tensor([1, 20, 64, -1])]; + tensor var_185_cast_fp16 = reshape(shape = var_184, x = value_1_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_185_cast_fp16, y = var_183_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_188 = const()[name = tensor("op_188"), val = tensor([1, 1280, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_188, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_198_pad_type_0 = const()[name = tensor("op_198_pad_type_0"), val = tensor("valid")]; + tensor var_198_strides_0 = const()[name = tensor("op_198_strides_0"), val = tensor([1, 1])]; + tensor var_198_pad_0 = const()[name = tensor("op_198_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_198_dilations_0 = const()[name = tensor("op_198_dilations_0"), val = tensor([1, 1])]; + tensor var_198_groups_0 = const()[name = tensor("op_198_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137251584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138070848))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138070976)))]; + tensor var_198_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_198_dilations_0, groups = var_198_groups_0, pad = var_198_pad_0, pad_type = var_198_pad_type_0, strides = var_198_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_198_cast_fp16")]; + tensor var_204_pad_type_0 = const()[name = tensor("op_204_pad_type_0"), val = tensor("valid")]; + tensor var_204_strides_0 = const()[name = tensor("op_204_strides_0"), val = tensor([1, 1])]; + tensor var_204_pad_0 = const()[name = tensor("op_204_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_204_dilations_0 = const()[name = tensor("op_204_dilations_0"), val = tensor([1, 1])]; + tensor var_204_groups_0 = const()[name = tensor("op_204_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138130624))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138073600))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_204_cast_fp16 = conv(dilations = var_204_dilations_0, groups = var_204_groups_0, pad = var_204_pad_0, pad_type = var_204_pad_type_0, strides = var_204_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_204_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_198_cast_fp16, y = var_204_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_219_to_fp16 = const()[name = tensor("op_219_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_219_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138335488)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138338112)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_241_pad_type_0 = const()[name = tensor("op_241_pad_type_0"), val = tensor("valid")]; + tensor var_241_strides_0 = const()[name = tensor("op_241_strides_0"), val = tensor([1, 1])]; + tensor var_241_pad_0 = const()[name = tensor("op_241_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_241_dilations_0 = const()[name = tensor("op_241_dilations_0"), val = tensor([1, 1])]; + tensor var_241_groups_0 = const()[name = tensor("op_241_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138340736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139160000))), name = tensor("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139160128)))]; + tensor var_241_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_241_dilations_0, groups = var_241_groups_0, pad = var_241_pad_0, pad_type = var_241_pad_type_0, strides = var_241_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_241_cast_fp16")]; + tensor var_247_pad_type_0 = const()[name = tensor("op_247_pad_type_0"), val = tensor("valid")]; + tensor var_247_strides_0 = const()[name = tensor("op_247_strides_0"), val = tensor([1, 1])]; + tensor var_247_pad_0 = const()[name = tensor("op_247_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_247_dilations_0 = const()[name = tensor("op_247_dilations_0"), val = tensor([1, 1])]; + tensor var_247_groups_0 = const()[name = tensor("op_247_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139188224))), name = tensor("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139162752))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_247_cast_fp16 = conv(dilations = var_247_dilations_0, groups = var_247_groups_0, pad = var_247_pad_0, pad_type = var_247_pad_type_0, strides = var_247_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_241_cast_fp16, y = var_247_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_256_pad_type_0 = const()[name = tensor("op_256_pad_type_0"), val = tensor("valid")]; + tensor var_256_strides_0 = const()[name = tensor("op_256_strides_0"), val = tensor([1, 1])]; + tensor var_256_pad_0 = const()[name = tensor("op_256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_256_dilations_0 = const()[name = tensor("op_256_dilations_0"), val = tensor([1, 1])]; + tensor var_256_groups_0 = const()[name = tensor("op_256_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139393088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140212352))), name = tensor("layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_256_cast_fp16 = conv(dilations = var_256_dilations_0, groups = var_256_groups_0, pad = var_256_pad_0, pad_type = var_256_pad_type_0, strides = var_256_strides_0, weight = layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_256_cast_fp16")]; + tensor var_262_pad_type_0 = const()[name = tensor("op_262_pad_type_0"), val = tensor("valid")]; + tensor var_262_strides_0 = const()[name = tensor("op_262_strides_0"), val = tensor([1, 1])]; + tensor var_262_pad_0 = const()[name = tensor("op_262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_262_dilations_0 = const()[name = tensor("op_262_dilations_0"), val = tensor([1, 1])]; + tensor var_262_groups_0 = const()[name = tensor("op_262_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140272448))), name = tensor("layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140212480))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_262_cast_fp16 = conv(dilations = var_262_dilations_0, groups = var_262_groups_0, pad = var_262_pad_0, pad_type = var_262_pad_type_0, strides = var_262_strides_0, weight = layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_262_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_256_cast_fp16, y = var_262_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_272_pad_type_0 = const()[name = tensor("op_272_pad_type_0"), val = tensor("valid")]; + tensor var_272_strides_0 = const()[name = tensor("op_272_strides_0"), val = tensor([1, 1])]; + tensor var_272_pad_0 = const()[name = tensor("op_272_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_272_dilations_0 = const()[name = tensor("op_272_dilations_0"), val = tensor([1, 1])]; + tensor var_272_groups_0 = const()[name = tensor("op_272_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140477312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141296576))), name = tensor("layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141296704)))]; + tensor var_272_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_272_dilations_0, groups = var_272_groups_0, pad = var_272_pad_0, pad_type = var_272_pad_type_0, strides = var_272_strides_0, weight = layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_272_cast_fp16")]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("valid")]; + tensor var_278_strides_0 = const()[name = tensor("op_278_strides_0"), val = tensor([1, 1])]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_dilations_0 = const()[name = tensor("op_278_dilations_0"), val = tensor([1, 1])]; + tensor var_278_groups_0 = const()[name = tensor("op_278_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141310592))), name = tensor("layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141299328))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_278_cast_fp16 = conv(dilations = var_278_dilations_0, groups = var_278_groups_0, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_278_strides_0, weight = layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_278_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_272_cast_fp16, y = var_278_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_281 = const()[name = tensor("op_281"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_281, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_283_to_fp16 = const()[name = tensor("op_283_to_fp16"), val = tensor(0x1p-3)]; + tensor var_284_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_283_to_fp16)[name = tensor("op_284_cast_fp16")]; + tensor var_285 = const()[name = tensor("op_285"), val = tensor([1, 20, 64, -1])]; + tensor var_286_cast_fp16 = reshape(shape = var_285, x = key_3_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_284_cast_fp16, y = var_286_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor obj_13_cast_fp16 = softmax(axis = var_70, x = mh_w_5_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 20, 64, -1])]; + tensor var_291_cast_fp16 = reshape(shape = var_290, x = value_3_cast_fp16)[name = tensor("op_291_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_291_cast_fp16, y = obj_13_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_294 = const()[name = tensor("op_294"), val = tensor([1, 1280, 1, -1])]; + tensor input_3_cast_fp16 = reshape(shape = var_294, x = attn_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_304_pad_type_0 = const()[name = tensor("op_304_pad_type_0"), val = tensor("valid")]; + tensor var_304_strides_0 = const()[name = tensor("op_304_strides_0"), val = tensor([1, 1])]; + tensor var_304_pad_0 = const()[name = tensor("op_304_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_304_dilations_0 = const()[name = tensor("op_304_dilations_0"), val = tensor([1, 1])]; + tensor var_304_groups_0 = const()[name = tensor("op_304_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141515456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142334720))), name = tensor("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142334848)))]; + tensor var_304_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_304_dilations_0, groups = var_304_groups_0, pad = var_304_pad_0, pad_type = var_304_pad_type_0, strides = var_304_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_304_cast_fp16")]; + tensor var_310_pad_type_0 = const()[name = tensor("op_310_pad_type_0"), val = tensor("valid")]; + tensor var_310_strides_0 = const()[name = tensor("op_310_strides_0"), val = tensor([1, 1])]; + tensor var_310_pad_0 = const()[name = tensor("op_310_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_310_dilations_0 = const()[name = tensor("op_310_dilations_0"), val = tensor([1, 1])]; + tensor var_310_groups_0 = const()[name = tensor("op_310_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142349632))), name = tensor("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142337472))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_310_cast_fp16 = conv(dilations = var_310_dilations_0, groups = var_310_groups_0, pad = var_310_pad_0, pad_type = var_310_pad_type_0, strides = var_310_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_304_cast_fp16, y = var_310_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_321_to_fp16 = const()[name = tensor("op_321_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_321_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor input_5_gamma_0_to_fp16 = const()[name = tensor("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142554496)))]; + tensor input_5_beta_0_to_fp16 = const()[name = tensor("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142557120)))]; + tensor input_5_epsilon_0_to_fp16 = const()[name = tensor("input_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_339_pad_type_0 = const()[name = tensor("op_339_pad_type_0"), val = tensor("valid")]; + tensor var_339_strides_0 = const()[name = tensor("op_339_strides_0"), val = tensor([1, 1])]; + tensor var_339_pad_0 = const()[name = tensor("op_339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_339_dilations_0 = const()[name = tensor("op_339_dilations_0"), val = tensor([1, 1])]; + tensor var_339_groups_0 = const()[name = tensor("op_339_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142559744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145836608))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145836736)))]; + tensor var_339_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_339_dilations_0, groups = var_339_groups_0, pad = var_339_pad_0, pad_type = var_339_pad_type_0, strides = var_339_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = tensor("op_339_cast_fp16")]; + tensor var_345_pad_type_0 = const()[name = tensor("op_345_pad_type_0"), val = tensor("valid")]; + tensor var_345_strides_0 = const()[name = tensor("op_345_strides_0"), val = tensor([1, 1])]; + tensor var_345_pad_0 = const()[name = tensor("op_345_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_345_dilations_0 = const()[name = tensor("op_345_dilations_0"), val = tensor([1, 1])]; + tensor var_345_groups_0 = const()[name = tensor("op_345_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145948608))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145847040))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_345_cast_fp16 = conv(dilations = var_345_dilations_0, groups = var_345_groups_0, pad = var_345_pad_0, pad_type = var_345_pad_type_0, strides = var_345_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = tensor("op_345_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = var_339_cast_fp16, y = var_345_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor input_9_mode_0 = const()[name = tensor("input_9_mode_0"), val = tensor("EXACT")]; + tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_356_pad_type_0 = const()[name = tensor("op_356_pad_type_0"), val = tensor("valid")]; + tensor var_356_strides_0 = const()[name = tensor("op_356_strides_0"), val = tensor([1, 1])]; + tensor var_356_pad_0 = const()[name = tensor("op_356_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_356_dilations_0 = const()[name = tensor("op_356_dilations_0"), val = tensor([1, 1])]; + tensor var_356_groups_0 = const()[name = tensor("op_356_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146767872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150044736))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150044864)))]; + tensor var_356_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_362_pad_type_0 = const()[name = tensor("op_362_pad_type_0"), val = tensor("valid")]; + tensor var_362_strides_0 = const()[name = tensor("op_362_strides_0"), val = tensor([1, 1])]; + tensor var_362_pad_0 = const()[name = tensor("op_362_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_362_dilations_0 = const()[name = tensor("op_362_dilations_0"), val = tensor([1, 1])]; + tensor var_362_groups_0 = const()[name = tensor("op_362_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150230016))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150047488))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_362_cast_fp16 = conv(dilations = var_362_dilations_0, groups = var_362_groups_0, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_362_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_362_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = add(x = var_356_cast_fp16, y = var_362_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor var_374 = const()[name = tensor("op_374"), val = tensor(3)]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_400_to_fp16 = const()[name = tensor("op_400_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_400_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor obj_15_gamma_0_to_fp16 = const()[name = tensor("obj_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151049280)))]; + tensor obj_15_beta_0_to_fp16 = const()[name = tensor("obj_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151051904)))]; + tensor obj_15_epsilon_0_to_fp16 = const()[name = tensor("obj_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("valid")]; + tensor var_422_strides_0 = const()[name = tensor("op_422_strides_0"), val = tensor([1, 1])]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_dilations_0 = const()[name = tensor("op_422_dilations_0"), val = tensor([1, 1])]; + tensor var_422_groups_0 = const()[name = tensor("op_422_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151054528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151873792))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151873920)))]; + tensor var_422_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_422_dilations_0, groups = var_422_groups_0, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_422_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor var_428_pad_type_0 = const()[name = tensor("op_428_pad_type_0"), val = tensor("valid")]; + tensor var_428_strides_0 = const()[name = tensor("op_428_strides_0"), val = tensor([1, 1])]; + tensor var_428_pad_0 = const()[name = tensor("op_428_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_428_dilations_0 = const()[name = tensor("op_428_dilations_0"), val = tensor([1, 1])]; + tensor var_428_groups_0 = const()[name = tensor("op_428_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151936640))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151876544))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_428_cast_fp16 = conv(dilations = var_428_dilations_0, groups = var_428_groups_0, pad = var_428_pad_0, pad_type = var_428_pad_type_0, strides = var_428_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_422_cast_fp16, y = var_428_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_437_pad_type_0 = const()[name = tensor("op_437_pad_type_0"), val = tensor("valid")]; + tensor var_437_strides_0 = const()[name = tensor("op_437_strides_0"), val = tensor([1, 1])]; + tensor var_437_pad_0 = const()[name = tensor("op_437_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_437_dilations_0 = const()[name = tensor("op_437_dilations_0"), val = tensor([1, 1])]; + tensor var_437_groups_0 = const()[name = tensor("op_437_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152141504))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152960768))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_437_cast_fp16 = conv(dilations = var_437_dilations_0, groups = var_437_groups_0, pad = var_437_pad_0, pad_type = var_437_pad_type_0, strides = var_437_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_437_cast_fp16")]; + tensor var_443_pad_type_0 = const()[name = tensor("op_443_pad_type_0"), val = tensor("valid")]; + tensor var_443_strides_0 = const()[name = tensor("op_443_strides_0"), val = tensor([1, 1])]; + tensor var_443_pad_0 = const()[name = tensor("op_443_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_443_dilations_0 = const()[name = tensor("op_443_dilations_0"), val = tensor([1, 1])]; + tensor var_443_groups_0 = const()[name = tensor("op_443_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153007552))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152960896))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_443_cast_fp16 = conv(dilations = var_443_dilations_0, groups = var_443_groups_0, pad = var_443_pad_0, pad_type = var_443_pad_type_0, strides = var_443_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor current_key_3_cast_fp16 = add(x = var_437_cast_fp16, y = var_443_cast_fp16)[name = tensor("current_key_3_cast_fp16")]; + tensor var_453_pad_type_0 = const()[name = tensor("op_453_pad_type_0"), val = tensor("valid")]; + tensor var_453_strides_0 = const()[name = tensor("op_453_strides_0"), val = tensor([1, 1])]; + tensor var_453_pad_0 = const()[name = tensor("op_453_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_453_dilations_0 = const()[name = tensor("op_453_dilations_0"), val = tensor([1, 1])]; + tensor var_453_groups_0 = const()[name = tensor("op_453_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153212416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154031680))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154031808)))]; + tensor var_453_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_453_dilations_0, groups = var_453_groups_0, pad = var_453_pad_0, pad_type = var_453_pad_type_0, strides = var_453_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_453_cast_fp16")]; + tensor var_459_pad_type_0 = const()[name = tensor("op_459_pad_type_0"), val = tensor("valid")]; + tensor var_459_strides_0 = const()[name = tensor("op_459_strides_0"), val = tensor([1, 1])]; + tensor var_459_pad_0 = const()[name = tensor("op_459_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_459_dilations_0 = const()[name = tensor("op_459_dilations_0"), val = tensor([1, 1])]; + tensor var_459_groups_0 = const()[name = tensor("op_459_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154057088))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154034432))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_459_cast_fp16 = conv(dilations = var_459_dilations_0, groups = var_459_groups_0, pad = var_459_pad_0, pad_type = var_459_pad_type_0, strides = var_459_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_459_cast_fp16")]; + tensor current_value_3_cast_fp16 = add(x = var_453_cast_fp16, y = var_459_cast_fp16)[name = tensor("current_value_3_cast_fp16")]; + tensor var_465_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_465_cast_fp16")]; + tensor var_467_cast_fp16 = mul(x = var_53_cast_fp16_1, y = var_162_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_465_cast_fp16, y = var_467_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_469_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_469_cast_fp16")]; + tensor var_471_cast_fp16 = mul(x = var_60_cast_fp16_1, y = var_162_cast_fp16)[name = tensor("op_471_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_469_cast_fp16, y = var_471_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_474 = const()[name = tensor("op_474"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_474, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_476_to_fp16 = const()[name = tensor("op_476_to_fp16"), val = tensor(0x1p-3)]; + tensor var_477_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_476_to_fp16)[name = tensor("op_477_cast_fp16")]; + tensor var_478 = const()[name = tensor("op_478"), val = tensor([1, 20, 64, -1])]; + tensor var_479_cast_fp16 = reshape(shape = var_478, x = key_5_cast_fp16)[name = tensor("op_479_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_477_cast_fp16, y = var_479_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_180_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_487_cast_fp16 = softmax(axis = var_374, x = mh_w_9_cast_fp16)[name = tensor("op_487_cast_fp16")]; + tensor var_488 = const()[name = tensor("op_488"), val = tensor([1, 20, 64, -1])]; + tensor var_489_cast_fp16 = reshape(shape = var_488, x = value_5_cast_fp16)[name = tensor("op_489_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_489_cast_fp16, y = var_487_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1280, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_492, x = attn_5_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("valid")]; + tensor var_502_strides_0 = const()[name = tensor("op_502_strides_0"), val = tensor([1, 1])]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_dilations_0 = const()[name = tensor("op_502_dilations_0"), val = tensor([1, 1])]; + tensor var_502_groups_0 = const()[name = tensor("op_502_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154261952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155081216))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155081344)))]; + tensor var_502_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_502_dilations_0, groups = var_502_groups_0, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_502_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_508_pad_type_0 = const()[name = tensor("op_508_pad_type_0"), val = tensor("valid")]; + tensor var_508_strides_0 = const()[name = tensor("op_508_strides_0"), val = tensor([1, 1])]; + tensor var_508_pad_0 = const()[name = tensor("op_508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_508_dilations_0 = const()[name = tensor("op_508_dilations_0"), val = tensor([1, 1])]; + tensor var_508_groups_0 = const()[name = tensor("op_508_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155108416))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155083968))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_508_cast_fp16 = conv(dilations = var_508_dilations_0, groups = var_508_groups_0, pad = var_508_pad_0, pad_type = var_508_pad_type_0, strides = var_508_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_508_cast_fp16")]; + tensor obj_21_cast_fp16 = add(x = var_502_cast_fp16, y = var_508_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_523_to_fp16 = const()[name = tensor("op_523_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_523_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_23_gamma_0_to_fp16 = const()[name = tensor("obj_23_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155313280)))]; + tensor obj_23_beta_0_to_fp16 = const()[name = tensor("obj_23_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155315904)))]; + tensor obj_23_epsilon_0_to_fp16 = const()[name = tensor("obj_23_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor var_545_pad_type_0 = const()[name = tensor("op_545_pad_type_0"), val = tensor("valid")]; + tensor var_545_strides_0 = const()[name = tensor("op_545_strides_0"), val = tensor([1, 1])]; + tensor var_545_pad_0 = const()[name = tensor("op_545_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_545_dilations_0 = const()[name = tensor("op_545_dilations_0"), val = tensor([1, 1])]; + tensor var_545_groups_0 = const()[name = tensor("op_545_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155318528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156137792))), name = tensor("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156137920)))]; + tensor var_545_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_545_dilations_0, groups = var_545_groups_0, pad = var_545_pad_0, pad_type = var_545_pad_type_0, strides = var_545_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_23_cast_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_551_pad_type_0 = const()[name = tensor("op_551_pad_type_0"), val = tensor("valid")]; + tensor var_551_strides_0 = const()[name = tensor("op_551_strides_0"), val = tensor([1, 1])]; + tensor var_551_pad_0 = const()[name = tensor("op_551_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_551_dilations_0 = const()[name = tensor("op_551_dilations_0"), val = tensor([1, 1])]; + tensor var_551_groups_0 = const()[name = tensor("op_551_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156183616))), name = tensor("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156140544))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_551_cast_fp16 = conv(dilations = var_551_dilations_0, groups = var_551_groups_0, pad = var_551_pad_0, pad_type = var_551_pad_type_0, strides = var_551_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_23_cast_fp16)[name = tensor("op_551_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_545_cast_fp16, y = var_551_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_560_pad_type_0 = const()[name = tensor("op_560_pad_type_0"), val = tensor("valid")]; + tensor var_560_strides_0 = const()[name = tensor("op_560_strides_0"), val = tensor([1, 1])]; + tensor var_560_pad_0 = const()[name = tensor("op_560_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_560_dilations_0 = const()[name = tensor("op_560_dilations_0"), val = tensor([1, 1])]; + tensor var_560_groups_0 = const()[name = tensor("op_560_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156388480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157207744))), name = tensor("layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_560_cast_fp16 = conv(dilations = var_560_dilations_0, groups = var_560_groups_0, pad = var_560_pad_0, pad_type = var_560_pad_type_0, strides = var_560_strides_0, weight = layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_560_cast_fp16")]; + tensor var_566_pad_type_0 = const()[name = tensor("op_566_pad_type_0"), val = tensor("valid")]; + tensor var_566_strides_0 = const()[name = tensor("op_566_strides_0"), val = tensor([1, 1])]; + tensor var_566_pad_0 = const()[name = tensor("op_566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_566_dilations_0 = const()[name = tensor("op_566_dilations_0"), val = tensor([1, 1])]; + tensor var_566_groups_0 = const()[name = tensor("op_566_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157251328))), name = tensor("layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157207872))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_566_cast_fp16 = conv(dilations = var_566_dilations_0, groups = var_566_groups_0, pad = var_566_pad_0, pad_type = var_566_pad_type_0, strides = var_566_strides_0, weight = layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_566_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_560_cast_fp16, y = var_566_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_576_pad_type_0 = const()[name = tensor("op_576_pad_type_0"), val = tensor("valid")]; + tensor var_576_strides_0 = const()[name = tensor("op_576_strides_0"), val = tensor([1, 1])]; + tensor var_576_pad_0 = const()[name = tensor("op_576_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_576_dilations_0 = const()[name = tensor("op_576_dilations_0"), val = tensor([1, 1])]; + tensor var_576_groups_0 = const()[name = tensor("op_576_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157456192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158275456))), name = tensor("layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158275584)))]; + tensor var_576_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_576_dilations_0, groups = var_576_groups_0, pad = var_576_pad_0, pad_type = var_576_pad_type_0, strides = var_576_strides_0, weight = layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_576_cast_fp16")]; + tensor var_582_pad_type_0 = const()[name = tensor("op_582_pad_type_0"), val = tensor("valid")]; + tensor var_582_strides_0 = const()[name = tensor("op_582_strides_0"), val = tensor([1, 1])]; + tensor var_582_pad_0 = const()[name = tensor("op_582_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_582_dilations_0 = const()[name = tensor("op_582_dilations_0"), val = tensor([1, 1])]; + tensor var_582_groups_0 = const()[name = tensor("op_582_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158289408))), name = tensor("layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158278208))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_582_cast_fp16 = conv(dilations = var_582_dilations_0, groups = var_582_groups_0, pad = var_582_pad_0, pad_type = var_582_pad_type_0, strides = var_582_strides_0, weight = layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_582_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_576_cast_fp16, y = var_582_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_585 = const()[name = tensor("op_585"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_585, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1p-3)]; + tensor var_588_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor var_589 = const()[name = tensor("op_589"), val = tensor([1, 20, 64, -1])]; + tensor var_590_cast_fp16 = reshape(shape = var_589, x = key_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_588_cast_fp16, y = var_590_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor obj_27_cast_fp16 = softmax(axis = var_374, x = mh_w_11_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor var_594 = const()[name = tensor("op_594"), val = tensor([1, 20, 64, -1])]; + tensor var_595_cast_fp16 = reshape(shape = var_594, x = value_7_cast_fp16)[name = tensor("op_595_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_595_cast_fp16, y = obj_27_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_598 = const()[name = tensor("op_598"), val = tensor([1, 1280, 1, -1])]; + tensor input_13_cast_fp16 = reshape(shape = var_598, x = attn_7_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("valid")]; + tensor var_608_strides_0 = const()[name = tensor("op_608_strides_0"), val = tensor([1, 1])]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_dilations_0 = const()[name = tensor("op_608_dilations_0"), val = tensor([1, 1])]; + tensor var_608_groups_0 = const()[name = tensor("op_608_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158494272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159313536))), name = tensor("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159313664)))]; + tensor var_608_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_608_dilations_0, groups = var_608_groups_0, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_608_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_614_pad_type_0 = const()[name = tensor("op_614_pad_type_0"), val = tensor("valid")]; + tensor var_614_strides_0 = const()[name = tensor("op_614_strides_0"), val = tensor([1, 1])]; + tensor var_614_pad_0 = const()[name = tensor("op_614_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_614_dilations_0 = const()[name = tensor("op_614_dilations_0"), val = tensor([1, 1])]; + tensor var_614_groups_0 = const()[name = tensor("op_614_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159326656))), name = tensor("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159316288))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_614_cast_fp16 = conv(dilations = var_614_dilations_0, groups = var_614_groups_0, pad = var_614_pad_0, pad_type = var_614_pad_type_0, strides = var_614_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor obj_25_cast_fp16 = add(x = var_608_cast_fp16, y = var_614_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_625_to_fp16 = const()[name = tensor("op_625_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_625_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_15_gamma_0_to_fp16 = const()[name = tensor("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159531520)))]; + tensor input_15_beta_0_to_fp16 = const()[name = tensor("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159534144)))]; + tensor input_15_epsilon_0_to_fp16 = const()[name = tensor("input_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_643_pad_type_0 = const()[name = tensor("op_643_pad_type_0"), val = tensor("valid")]; + tensor var_643_strides_0 = const()[name = tensor("op_643_strides_0"), val = tensor([1, 1])]; + tensor var_643_pad_0 = const()[name = tensor("op_643_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_643_dilations_0 = const()[name = tensor("op_643_dilations_0"), val = tensor([1, 1])]; + tensor var_643_groups_0 = const()[name = tensor("op_643_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159536768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162813632))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162813760)))]; + tensor var_643_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_643_dilations_0, groups = var_643_groups_0, pad = var_643_pad_0, pad_type = var_643_pad_type_0, strides = var_643_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_643_cast_fp16")]; + tensor var_649_pad_type_0 = const()[name = tensor("op_649_pad_type_0"), val = tensor("valid")]; + tensor var_649_strides_0 = const()[name = tensor("op_649_strides_0"), val = tensor([1, 1])]; + tensor var_649_pad_0 = const()[name = tensor("op_649_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_649_dilations_0 = const()[name = tensor("op_649_dilations_0"), val = tensor([1, 1])]; + tensor var_649_groups_0 = const()[name = tensor("op_649_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162909312))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162824064))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_649_cast_fp16 = conv(dilations = var_649_dilations_0, groups = var_649_groups_0, pad = var_649_pad_0, pad_type = var_649_pad_type_0, strides = var_649_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_649_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = var_643_cast_fp16, y = var_649_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor input_19_mode_0 = const()[name = tensor("input_19_mode_0"), val = tensor("EXACT")]; + tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_660_pad_type_0 = const()[name = tensor("op_660_pad_type_0"), val = tensor("valid")]; + tensor var_660_strides_0 = const()[name = tensor("op_660_strides_0"), val = tensor([1, 1])]; + tensor var_660_pad_0 = const()[name = tensor("op_660_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_660_dilations_0 = const()[name = tensor("op_660_dilations_0"), val = tensor([1, 1])]; + tensor var_660_groups_0 = const()[name = tensor("op_660_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163728576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167005440))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167005568)))]; + tensor var_660_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_660_dilations_0, groups = var_660_groups_0, pad = var_660_pad_0, pad_type = var_660_pad_type_0, strides = var_660_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_666_pad_type_0 = const()[name = tensor("op_666_pad_type_0"), val = tensor("valid")]; + tensor var_666_strides_0 = const()[name = tensor("op_666_strides_0"), val = tensor([1, 1])]; + tensor var_666_pad_0 = const()[name = tensor("op_666_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_666_dilations_0 = const()[name = tensor("op_666_dilations_0"), val = tensor([1, 1])]; + tensor var_666_groups_0 = const()[name = tensor("op_666_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167096192))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167008192))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_666_cast_fp16 = conv(dilations = var_666_dilations_0, groups = var_666_groups_0, pad = var_666_pad_0, pad_type = var_666_pad_type_0, strides = var_666_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_666_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_660_cast_fp16, y = var_666_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_678 = const()[name = tensor("op_678"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_704_to_fp16 = const()[name = tensor("op_704_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_704_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167915456)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167918080)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_726_pad_type_0 = const()[name = tensor("op_726_pad_type_0"), val = tensor("valid")]; + tensor var_726_strides_0 = const()[name = tensor("op_726_strides_0"), val = tensor([1, 1])]; + tensor var_726_pad_0 = const()[name = tensor("op_726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_726_dilations_0 = const()[name = tensor("op_726_dilations_0"), val = tensor([1, 1])]; + tensor var_726_groups_0 = const()[name = tensor("op_726_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167920704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168739968))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168740096)))]; + tensor var_726_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_726_dilations_0, groups = var_726_groups_0, pad = var_726_pad_0, pad_type = var_726_pad_type_0, strides = var_726_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_726_cast_fp16")]; + tensor var_732_pad_type_0 = const()[name = tensor("op_732_pad_type_0"), val = tensor("valid")]; + tensor var_732_strides_0 = const()[name = tensor("op_732_strides_0"), val = tensor([1, 1])]; + tensor var_732_pad_0 = const()[name = tensor("op_732_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_732_dilations_0 = const()[name = tensor("op_732_dilations_0"), val = tensor([1, 1])]; + tensor var_732_groups_0 = const()[name = tensor("op_732_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168774976))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168742720))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_732_cast_fp16 = conv(dilations = var_732_dilations_0, groups = var_732_groups_0, pad = var_732_pad_0, pad_type = var_732_pad_type_0, strides = var_732_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_732_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_726_cast_fp16, y = var_732_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_741_pad_type_0 = const()[name = tensor("op_741_pad_type_0"), val = tensor("valid")]; + tensor var_741_strides_0 = const()[name = tensor("op_741_strides_0"), val = tensor([1, 1])]; + tensor var_741_pad_0 = const()[name = tensor("op_741_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_741_dilations_0 = const()[name = tensor("op_741_dilations_0"), val = tensor([1, 1])]; + tensor var_741_groups_0 = const()[name = tensor("op_741_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168979840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169799104))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_741_cast_fp16 = conv(dilations = var_741_dilations_0, groups = var_741_groups_0, pad = var_741_pad_0, pad_type = var_741_pad_type_0, strides = var_741_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_741_cast_fp16")]; + tensor var_747_pad_type_0 = const()[name = tensor("op_747_pad_type_0"), val = tensor("valid")]; + tensor var_747_strides_0 = const()[name = tensor("op_747_strides_0"), val = tensor([1, 1])]; + tensor var_747_pad_0 = const()[name = tensor("op_747_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_747_dilations_0 = const()[name = tensor("op_747_dilations_0"), val = tensor([1, 1])]; + tensor var_747_groups_0 = const()[name = tensor("op_747_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169836736))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169799232))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_747_cast_fp16 = conv(dilations = var_747_dilations_0, groups = var_747_groups_0, pad = var_747_pad_0, pad_type = var_747_pad_type_0, strides = var_747_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_747_cast_fp16")]; + tensor current_key_5_cast_fp16 = add(x = var_741_cast_fp16, y = var_747_cast_fp16)[name = tensor("current_key_5_cast_fp16")]; + tensor var_757_pad_type_0 = const()[name = tensor("op_757_pad_type_0"), val = tensor("valid")]; + tensor var_757_strides_0 = const()[name = tensor("op_757_strides_0"), val = tensor([1, 1])]; + tensor var_757_pad_0 = const()[name = tensor("op_757_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_757_dilations_0 = const()[name = tensor("op_757_dilations_0"), val = tensor([1, 1])]; + tensor var_757_groups_0 = const()[name = tensor("op_757_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170041600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170860864))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170860992)))]; + tensor var_757_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_757_dilations_0, groups = var_757_groups_0, pad = var_757_pad_0, pad_type = var_757_pad_type_0, strides = var_757_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_763_pad_type_0 = const()[name = tensor("op_763_pad_type_0"), val = tensor("valid")]; + tensor var_763_strides_0 = const()[name = tensor("op_763_strides_0"), val = tensor([1, 1])]; + tensor var_763_pad_0 = const()[name = tensor("op_763_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_763_dilations_0 = const()[name = tensor("op_763_dilations_0"), val = tensor([1, 1])]; + tensor var_763_groups_0 = const()[name = tensor("op_763_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170876544))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170863616))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_763_cast_fp16 = conv(dilations = var_763_dilations_0, groups = var_763_groups_0, pad = var_763_pad_0, pad_type = var_763_pad_type_0, strides = var_763_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor current_value_5_cast_fp16 = add(x = var_757_cast_fp16, y = var_763_cast_fp16)[name = tensor("current_value_5_cast_fp16")]; + tensor var_769_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_771_cast_fp16 = mul(x = var_53_cast_fp16_2, y = var_162_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_769_cast_fp16, y = var_771_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_773_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_775_cast_fp16 = mul(x = var_60_cast_fp16_2, y = var_162_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_773_cast_fp16, y = var_775_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_778 = const()[name = tensor("op_778"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_778, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_780_to_fp16 = const()[name = tensor("op_780_to_fp16"), val = tensor(0x1p-3)]; + tensor var_781_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_780_to_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor([1, 20, 64, -1])]; + tensor var_783_cast_fp16 = reshape(shape = var_782, x = key_9_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_781_cast_fp16, y = var_783_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_180_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_791_cast_fp16 = softmax(axis = var_678, x = mh_w_15_cast_fp16)[name = tensor("op_791_cast_fp16")]; + tensor var_792 = const()[name = tensor("op_792"), val = tensor([1, 20, 64, -1])]; + tensor var_793_cast_fp16 = reshape(shape = var_792, x = value_9_cast_fp16)[name = tensor("op_793_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_793_cast_fp16, y = var_791_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_796 = const()[name = tensor("op_796"), val = tensor([1, 1280, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_796, x = attn_9_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_806_pad_type_0 = const()[name = tensor("op_806_pad_type_0"), val = tensor("valid")]; + tensor var_806_strides_0 = const()[name = tensor("op_806_strides_0"), val = tensor([1, 1])]; + tensor var_806_pad_0 = const()[name = tensor("op_806_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_806_dilations_0 = const()[name = tensor("op_806_dilations_0"), val = tensor([1, 1])]; + tensor var_806_groups_0 = const()[name = tensor("op_806_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171081408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171900672))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171900800)))]; + tensor var_806_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_806_dilations_0, groups = var_806_groups_0, pad = var_806_pad_0, pad_type = var_806_pad_type_0, strides = var_806_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = tensor("op_806_cast_fp16")]; + tensor var_812_pad_type_0 = const()[name = tensor("op_812_pad_type_0"), val = tensor("valid")]; + tensor var_812_strides_0 = const()[name = tensor("op_812_strides_0"), val = tensor([1, 1])]; + tensor var_812_pad_0 = const()[name = tensor("op_812_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_812_dilations_0 = const()[name = tensor("op_812_dilations_0"), val = tensor([1, 1])]; + tensor var_812_groups_0 = const()[name = tensor("op_812_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171914880))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171903424))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_812_cast_fp16 = conv(dilations = var_812_dilations_0, groups = var_812_groups_0, pad = var_812_pad_0, pad_type = var_812_pad_type_0, strides = var_812_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = tensor("op_812_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_806_cast_fp16, y = var_812_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_827_to_fp16 = const()[name = tensor("op_827_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_827_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172119744)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172122368)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_849_pad_type_0 = const()[name = tensor("op_849_pad_type_0"), val = tensor("valid")]; + tensor var_849_strides_0 = const()[name = tensor("op_849_strides_0"), val = tensor([1, 1])]; + tensor var_849_pad_0 = const()[name = tensor("op_849_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_849_dilations_0 = const()[name = tensor("op_849_dilations_0"), val = tensor([1, 1])]; + tensor var_849_groups_0 = const()[name = tensor("op_849_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172124992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172944256))), name = tensor("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172944384)))]; + tensor var_849_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_849_dilations_0, groups = var_849_groups_0, pad = var_849_pad_0, pad_type = var_849_pad_type_0, strides = var_849_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_849_cast_fp16")]; + tensor var_855_pad_type_0 = const()[name = tensor("op_855_pad_type_0"), val = tensor("valid")]; + tensor var_855_strides_0 = const()[name = tensor("op_855_strides_0"), val = tensor([1, 1])]; + tensor var_855_pad_0 = const()[name = tensor("op_855_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_855_dilations_0 = const()[name = tensor("op_855_dilations_0"), val = tensor([1, 1])]; + tensor var_855_groups_0 = const()[name = tensor("op_855_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172974720))), name = tensor("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172947008))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_855_cast_fp16 = conv(dilations = var_855_dilations_0, groups = var_855_groups_0, pad = var_855_pad_0, pad_type = var_855_pad_type_0, strides = var_855_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_855_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_849_cast_fp16, y = var_855_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_864_pad_type_0 = const()[name = tensor("op_864_pad_type_0"), val = tensor("valid")]; + tensor var_864_strides_0 = const()[name = tensor("op_864_strides_0"), val = tensor([1, 1])]; + tensor var_864_pad_0 = const()[name = tensor("op_864_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_864_dilations_0 = const()[name = tensor("op_864_dilations_0"), val = tensor([1, 1])]; + tensor var_864_groups_0 = const()[name = tensor("op_864_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173179584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173998848))), name = tensor("layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_864_cast_fp16 = conv(dilations = var_864_dilations_0, groups = var_864_groups_0, pad = var_864_pad_0, pad_type = var_864_pad_type_0, strides = var_864_strides_0, weight = layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_864_cast_fp16")]; + tensor var_870_pad_type_0 = const()[name = tensor("op_870_pad_type_0"), val = tensor("valid")]; + tensor var_870_strides_0 = const()[name = tensor("op_870_strides_0"), val = tensor([1, 1])]; + tensor var_870_pad_0 = const()[name = tensor("op_870_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_870_dilations_0 = const()[name = tensor("op_870_dilations_0"), val = tensor([1, 1])]; + tensor var_870_groups_0 = const()[name = tensor("op_870_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174026816))), name = tensor("layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173998976))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_870_cast_fp16 = conv(dilations = var_870_dilations_0, groups = var_870_groups_0, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_870_strides_0, weight = layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_870_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_864_cast_fp16, y = var_870_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_880_pad_type_0 = const()[name = tensor("op_880_pad_type_0"), val = tensor("valid")]; + tensor var_880_strides_0 = const()[name = tensor("op_880_strides_0"), val = tensor([1, 1])]; + tensor var_880_pad_0 = const()[name = tensor("op_880_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_880_dilations_0 = const()[name = tensor("op_880_dilations_0"), val = tensor([1, 1])]; + tensor var_880_groups_0 = const()[name = tensor("op_880_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174231680))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175050944))), name = tensor("layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175051072)))]; + tensor var_880_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_880_dilations_0, groups = var_880_groups_0, pad = var_880_pad_0, pad_type = var_880_pad_type_0, strides = var_880_strides_0, weight = layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_880_cast_fp16")]; + tensor var_886_pad_type_0 = const()[name = tensor("op_886_pad_type_0"), val = tensor("valid")]; + tensor var_886_strides_0 = const()[name = tensor("op_886_strides_0"), val = tensor([1, 1])]; + tensor var_886_pad_0 = const()[name = tensor("op_886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_886_dilations_0 = const()[name = tensor("op_886_dilations_0"), val = tensor([1, 1])]; + tensor var_886_groups_0 = const()[name = tensor("op_886_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175065280))), name = tensor("layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175053696))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_886_cast_fp16 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_886_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_880_cast_fp16, y = var_886_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_889 = const()[name = tensor("op_889"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_889, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_891_to_fp16 = const()[name = tensor("op_891_to_fp16"), val = tensor(0x1p-3)]; + tensor var_892_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_891_to_fp16)[name = tensor("op_892_cast_fp16")]; + tensor var_893 = const()[name = tensor("op_893"), val = tensor([1, 20, 64, -1])]; + tensor var_894_cast_fp16 = reshape(shape = var_893, x = key_11_cast_fp16)[name = tensor("op_894_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_892_cast_fp16, y = var_894_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor obj_41_cast_fp16 = softmax(axis = var_678, x = mh_w_17_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 20, 64, -1])]; + tensor var_899_cast_fp16 = reshape(shape = var_898, x = value_11_cast_fp16)[name = tensor("op_899_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_899_cast_fp16, y = obj_41_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_902 = const()[name = tensor("op_902"), val = tensor([1, 1280, 1, -1])]; + tensor input_23_cast_fp16 = reshape(shape = var_902, x = attn_11_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_912_pad_type_0 = const()[name = tensor("op_912_pad_type_0"), val = tensor("valid")]; + tensor var_912_strides_0 = const()[name = tensor("op_912_strides_0"), val = tensor([1, 1])]; + tensor var_912_pad_0 = const()[name = tensor("op_912_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_912_dilations_0 = const()[name = tensor("op_912_dilations_0"), val = tensor([1, 1])]; + tensor var_912_groups_0 = const()[name = tensor("op_912_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175270144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176089408))), name = tensor("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176089536)))]; + tensor var_912_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_912_dilations_0, groups = var_912_groups_0, pad = var_912_pad_0, pad_type = var_912_pad_type_0, strides = var_912_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor var_918_pad_type_0 = const()[name = tensor("op_918_pad_type_0"), val = tensor("valid")]; + tensor var_918_strides_0 = const()[name = tensor("op_918_strides_0"), val = tensor([1, 1])]; + tensor var_918_pad_0 = const()[name = tensor("op_918_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_918_dilations_0 = const()[name = tensor("op_918_dilations_0"), val = tensor([1, 1])]; + tensor var_918_groups_0 = const()[name = tensor("op_918_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176105152))), name = tensor("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176092160))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_918_cast_fp16 = conv(dilations = var_918_dilations_0, groups = var_918_groups_0, pad = var_918_pad_0, pad_type = var_918_pad_type_0, strides = var_918_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_918_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_912_cast_fp16, y = var_918_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_932_to_fp16 = const()[name = tensor("op_932_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_932_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor input_25_gamma_0_to_fp16 = const()[name = tensor("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176310016)))]; + tensor input_25_beta_0_to_fp16 = const()[name = tensor("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176312640)))]; + tensor input_25_epsilon_0_to_fp16 = const()[name = tensor("input_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_950_pad_type_0 = const()[name = tensor("op_950_pad_type_0"), val = tensor("valid")]; + tensor var_950_strides_0 = const()[name = tensor("op_950_strides_0"), val = tensor([1, 1])]; + tensor var_950_pad_0 = const()[name = tensor("op_950_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_950_dilations_0 = const()[name = tensor("op_950_dilations_0"), val = tensor([1, 1])]; + tensor var_950_groups_0 = const()[name = tensor("op_950_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176315264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179592128))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179592256)))]; + tensor var_950_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_950_dilations_0, groups = var_950_groups_0, pad = var_950_pad_0, pad_type = var_950_pad_type_0, strides = var_950_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_950_cast_fp16")]; + tensor var_956_pad_type_0 = const()[name = tensor("op_956_pad_type_0"), val = tensor("valid")]; + tensor var_956_strides_0 = const()[name = tensor("op_956_strides_0"), val = tensor([1, 1])]; + tensor var_956_pad_0 = const()[name = tensor("op_956_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_956_dilations_0 = const()[name = tensor("op_956_dilations_0"), val = tensor([1, 1])]; + tensor var_956_groups_0 = const()[name = tensor("op_956_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179764480))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179602560))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_956_cast_fp16 = conv(dilations = var_956_dilations_0, groups = var_956_groups_0, pad = var_956_pad_0, pad_type = var_956_pad_type_0, strides = var_956_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor input_27_cast_fp16 = add(x = var_950_cast_fp16, y = var_956_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor input_29_mode_0 = const()[name = tensor("input_29_mode_0"), val = tensor("EXACT")]; + tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor var_967_pad_type_0 = const()[name = tensor("op_967_pad_type_0"), val = tensor("valid")]; + tensor var_967_strides_0 = const()[name = tensor("op_967_strides_0"), val = tensor([1, 1])]; + tensor var_967_pad_0 = const()[name = tensor("op_967_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_967_dilations_0 = const()[name = tensor("op_967_dilations_0"), val = tensor([1, 1])]; + tensor var_967_groups_0 = const()[name = tensor("op_967_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(180583744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183860608))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183860736)))]; + tensor var_967_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_967_dilations_0, groups = var_967_groups_0, pad = var_967_pad_0, pad_type = var_967_pad_type_0, strides = var_967_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = tensor("op_967_cast_fp16")]; + tensor var_973_pad_type_0 = const()[name = tensor("op_973_pad_type_0"), val = tensor("valid")]; + tensor var_973_strides_0 = const()[name = tensor("op_973_strides_0"), val = tensor([1, 1])]; + tensor var_973_pad_0 = const()[name = tensor("op_973_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_973_dilations_0 = const()[name = tensor("op_973_dilations_0"), val = tensor([1, 1])]; + tensor var_973_groups_0 = const()[name = tensor("op_973_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183943552))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183863360))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_973_cast_fp16 = conv(dilations = var_973_dilations_0, groups = var_973_groups_0, pad = var_973_pad_0, pad_type = var_973_pad_type_0, strides = var_973_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = tensor("op_973_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_967_cast_fp16, y = var_973_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor var_986 = const()[name = tensor("op_986"), val = tensor(3)]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1012_to_fp16 = const()[name = tensor("op_1012_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1012_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor obj_43_gamma_0_to_fp16 = const()[name = tensor("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184762816)))]; + tensor obj_43_beta_0_to_fp16 = const()[name = tensor("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184765440)))]; + tensor obj_43_epsilon_0_to_fp16 = const()[name = tensor("obj_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor var_1034_pad_type_0 = const()[name = tensor("op_1034_pad_type_0"), val = tensor("valid")]; + tensor var_1034_strides_0 = const()[name = tensor("op_1034_strides_0"), val = tensor([1, 1])]; + tensor var_1034_pad_0 = const()[name = tensor("op_1034_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1034_dilations_0 = const()[name = tensor("op_1034_dilations_0"), val = tensor([1, 1])]; + tensor var_1034_groups_0 = const()[name = tensor("op_1034_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184768064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185587328))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185587456)))]; + tensor var_1034_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1034_dilations_0, groups = var_1034_groups_0, pad = var_1034_pad_0, pad_type = var_1034_pad_type_0, strides = var_1034_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1034_cast_fp16")]; + tensor var_1040_pad_type_0 = const()[name = tensor("op_1040_pad_type_0"), val = tensor("valid")]; + tensor var_1040_strides_0 = const()[name = tensor("op_1040_strides_0"), val = tensor([1, 1])]; + tensor var_1040_pad_0 = const()[name = tensor("op_1040_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1040_dilations_0 = const()[name = tensor("op_1040_dilations_0"), val = tensor([1, 1])]; + tensor var_1040_groups_0 = const()[name = tensor("op_1040_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185611520))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185590080))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1040_cast_fp16 = conv(dilations = var_1040_dilations_0, groups = var_1040_groups_0, pad = var_1040_pad_0, pad_type = var_1040_pad_type_0, strides = var_1040_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1040_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1034_cast_fp16, y = var_1040_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("valid")]; + tensor var_1049_strides_0 = const()[name = tensor("op_1049_strides_0"), val = tensor([1, 1])]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_dilations_0 = const()[name = tensor("op_1049_dilations_0"), val = tensor([1, 1])]; + tensor var_1049_groups_0 = const()[name = tensor("op_1049_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185816384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186635648))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1049_dilations_0, groups = var_1049_groups_0, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1049_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor var_1055_pad_type_0 = const()[name = tensor("op_1055_pad_type_0"), val = tensor("valid")]; + tensor var_1055_strides_0 = const()[name = tensor("op_1055_strides_0"), val = tensor([1, 1])]; + tensor var_1055_pad_0 = const()[name = tensor("op_1055_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1055_dilations_0 = const()[name = tensor("op_1055_dilations_0"), val = tensor([1, 1])]; + tensor var_1055_groups_0 = const()[name = tensor("op_1055_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186656640))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186635776))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1055_cast_fp16 = conv(dilations = var_1055_dilations_0, groups = var_1055_groups_0, pad = var_1055_pad_0, pad_type = var_1055_pad_type_0, strides = var_1055_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1055_cast_fp16")]; + tensor current_key_cast_fp16 = add(x = var_1049_cast_fp16, y = var_1055_cast_fp16)[name = tensor("current_key_cast_fp16")]; + tensor var_1065_pad_type_0 = const()[name = tensor("op_1065_pad_type_0"), val = tensor("valid")]; + tensor var_1065_strides_0 = const()[name = tensor("op_1065_strides_0"), val = tensor([1, 1])]; + tensor var_1065_pad_0 = const()[name = tensor("op_1065_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1065_dilations_0 = const()[name = tensor("op_1065_dilations_0"), val = tensor([1, 1])]; + tensor var_1065_groups_0 = const()[name = tensor("op_1065_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186861504))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187680768))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187680896)))]; + tensor var_1065_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1065_dilations_0, groups = var_1065_groups_0, pad = var_1065_pad_0, pad_type = var_1065_pad_type_0, strides = var_1065_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1065_cast_fp16")]; + tensor var_1071_pad_type_0 = const()[name = tensor("op_1071_pad_type_0"), val = tensor("valid")]; + tensor var_1071_strides_0 = const()[name = tensor("op_1071_strides_0"), val = tensor([1, 1])]; + tensor var_1071_pad_0 = const()[name = tensor("op_1071_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1071_dilations_0 = const()[name = tensor("op_1071_dilations_0"), val = tensor([1, 1])]; + tensor var_1071_groups_0 = const()[name = tensor("op_1071_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187698304))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187683520))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1071_cast_fp16 = conv(dilations = var_1071_dilations_0, groups = var_1071_groups_0, pad = var_1071_pad_0, pad_type = var_1071_pad_type_0, strides = var_1071_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1071_cast_fp16")]; + tensor current_value_cast_fp16 = add(x = var_1065_cast_fp16, y = var_1071_cast_fp16)[name = tensor("current_value_cast_fp16")]; + tensor var_1077_cast_fp16 = mul(x = current_key_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_1077_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = var_53_cast_fp16_3, y = var_162_cast_fp16)[name = tensor("op_1079_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1077_cast_fp16, y = var_1079_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1081_cast_fp16 = mul(x = current_value_cast_fp16, y = var_159_cast_fp16)[name = tensor("op_1081_cast_fp16")]; + tensor var_1083_cast_fp16 = mul(x = var_60_cast_fp16_3, y = var_162_cast_fp16)[name = tensor("op_1083_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1081_cast_fp16, y = var_1083_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1086 = const()[name = tensor("op_1086"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1086, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1088_to_fp16 = const()[name = tensor("op_1088_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1089_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1088_to_fp16)[name = tensor("op_1089_cast_fp16")]; + tensor var_1090 = const()[name = tensor("op_1090"), val = tensor([1, 20, 64, -1])]; + tensor var_1091_cast_fp16 = reshape(shape = var_1090, x = key_13_cast_fp16)[name = tensor("op_1091_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1089_cast_fp16, y = var_1091_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_180_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_1099_cast_fp16 = softmax(axis = var_986, x = mh_w_21_cast_fp16)[name = tensor("op_1099_cast_fp16")]; + tensor var_1100 = const()[name = tensor("op_1100"), val = tensor([1, 20, 64, -1])]; + tensor var_1101_cast_fp16 = reshape(shape = var_1100, x = value_13_cast_fp16)[name = tensor("op_1101_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1101_cast_fp16, y = var_1099_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1104 = const()[name = tensor("op_1104"), val = tensor([1, 1280, 1, -1])]; + tensor input_31_cast_fp16 = reshape(shape = var_1104, x = attn_13_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_1114_pad_type_0 = const()[name = tensor("op_1114_pad_type_0"), val = tensor("valid")]; + tensor var_1114_strides_0 = const()[name = tensor("op_1114_strides_0"), val = tensor([1, 1])]; + tensor var_1114_pad_0 = const()[name = tensor("op_1114_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1114_dilations_0 = const()[name = tensor("op_1114_dilations_0"), val = tensor([1, 1])]; + tensor var_1114_groups_0 = const()[name = tensor("op_1114_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187903168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188722432))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188722560)))]; + tensor var_1114_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1114_dilations_0, groups = var_1114_groups_0, pad = var_1114_pad_0, pad_type = var_1114_pad_type_0, strides = var_1114_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_1114_cast_fp16")]; + tensor var_1120_pad_type_0 = const()[name = tensor("op_1120_pad_type_0"), val = tensor("valid")]; + tensor var_1120_strides_0 = const()[name = tensor("op_1120_strides_0"), val = tensor([1, 1])]; + tensor var_1120_pad_0 = const()[name = tensor("op_1120_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1120_dilations_0 = const()[name = tensor("op_1120_dilations_0"), val = tensor([1, 1])]; + tensor var_1120_groups_0 = const()[name = tensor("op_1120_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188739712))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188725184))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1120_cast_fp16 = conv(dilations = var_1120_dilations_0, groups = var_1120_groups_0, pad = var_1120_pad_0, pad_type = var_1120_pad_type_0, strides = var_1120_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_1120_cast_fp16")]; + tensor obj_49_cast_fp16 = add(x = var_1114_cast_fp16, y = var_1120_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1135_to_fp16 = const()[name = tensor("op_1135_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1135_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_51_gamma_0_to_fp16 = const()[name = tensor("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188944576)))]; + tensor obj_51_beta_0_to_fp16 = const()[name = tensor("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188947200)))]; + tensor obj_51_epsilon_0_to_fp16 = const()[name = tensor("obj_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor var_1157_pad_type_0 = const()[name = tensor("op_1157_pad_type_0"), val = tensor("valid")]; + tensor var_1157_strides_0 = const()[name = tensor("op_1157_strides_0"), val = tensor([1, 1])]; + tensor var_1157_pad_0 = const()[name = tensor("op_1157_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1157_dilations_0 = const()[name = tensor("op_1157_dilations_0"), val = tensor([1, 1])]; + tensor var_1157_groups_0 = const()[name = tensor("op_1157_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188949824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189769088))), name = tensor("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189769216)))]; + tensor var_1157_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1157_dilations_0, groups = var_1157_groups_0, pad = var_1157_pad_0, pad_type = var_1157_pad_type_0, strides = var_1157_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = tensor("op_1157_cast_fp16")]; + tensor var_1163_pad_type_0 = const()[name = tensor("op_1163_pad_type_0"), val = tensor("valid")]; + tensor var_1163_strides_0 = const()[name = tensor("op_1163_strides_0"), val = tensor([1, 1])]; + tensor var_1163_pad_0 = const()[name = tensor("op_1163_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1163_dilations_0 = const()[name = tensor("op_1163_dilations_0"), val = tensor([1, 1])]; + tensor var_1163_groups_0 = const()[name = tensor("op_1163_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189787264))), name = tensor("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189771840))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1163_cast_fp16 = conv(dilations = var_1163_dilations_0, groups = var_1163_groups_0, pad = var_1163_pad_0, pad_type = var_1163_pad_type_0, strides = var_1163_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = tensor("op_1163_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_1157_cast_fp16, y = var_1163_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_1172_pad_type_0 = const()[name = tensor("op_1172_pad_type_0"), val = tensor("valid")]; + tensor var_1172_strides_0 = const()[name = tensor("op_1172_strides_0"), val = tensor([1, 1])]; + tensor var_1172_pad_0 = const()[name = tensor("op_1172_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1172_dilations_0 = const()[name = tensor("op_1172_dilations_0"), val = tensor([1, 1])]; + tensor var_1172_groups_0 = const()[name = tensor("op_1172_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189992128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190811392))), name = tensor("layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1172_cast_fp16 = conv(dilations = var_1172_dilations_0, groups = var_1172_groups_0, pad = var_1172_pad_0, pad_type = var_1172_pad_type_0, strides = var_1172_strides_0, weight = layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1172_cast_fp16")]; + tensor var_1178_pad_type_0 = const()[name = tensor("op_1178_pad_type_0"), val = tensor("valid")]; + tensor var_1178_strides_0 = const()[name = tensor("op_1178_strides_0"), val = tensor([1, 1])]; + tensor var_1178_pad_0 = const()[name = tensor("op_1178_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1178_dilations_0 = const()[name = tensor("op_1178_dilations_0"), val = tensor([1, 1])]; + tensor var_1178_groups_0 = const()[name = tensor("op_1178_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190834240))), name = tensor("layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190811520))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1178_cast_fp16 = conv(dilations = var_1178_dilations_0, groups = var_1178_groups_0, pad = var_1178_pad_0, pad_type = var_1178_pad_type_0, strides = var_1178_strides_0, weight = layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1178_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_1172_cast_fp16, y = var_1178_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_1188_pad_type_0 = const()[name = tensor("op_1188_pad_type_0"), val = tensor("valid")]; + tensor var_1188_strides_0 = const()[name = tensor("op_1188_strides_0"), val = tensor([1, 1])]; + tensor var_1188_pad_0 = const()[name = tensor("op_1188_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1188_dilations_0 = const()[name = tensor("op_1188_dilations_0"), val = tensor([1, 1])]; + tensor var_1188_groups_0 = const()[name = tensor("op_1188_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191039104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191858368))), name = tensor("layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191858496)))]; + tensor var_1188_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1188_dilations_0, groups = var_1188_groups_0, pad = var_1188_pad_0, pad_type = var_1188_pad_type_0, strides = var_1188_strides_0, weight = layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1188_cast_fp16")]; + tensor var_1194_pad_type_0 = const()[name = tensor("op_1194_pad_type_0"), val = tensor("valid")]; + tensor var_1194_strides_0 = const()[name = tensor("op_1194_strides_0"), val = tensor([1, 1])]; + tensor var_1194_pad_0 = const()[name = tensor("op_1194_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1194_dilations_0 = const()[name = tensor("op_1194_dilations_0"), val = tensor([1, 1])]; + tensor var_1194_groups_0 = const()[name = tensor("op_1194_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191874944))), name = tensor("layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191861120))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1194_cast_fp16 = conv(dilations = var_1194_dilations_0, groups = var_1194_groups_0, pad = var_1194_pad_0, pad_type = var_1194_pad_type_0, strides = var_1194_strides_0, weight = layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1194_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_1188_cast_fp16, y = var_1194_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_1197 = const()[name = tensor("op_1197"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_1197, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_1199_to_fp16 = const()[name = tensor("op_1199_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1200_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_1199_to_fp16)[name = tensor("op_1200_cast_fp16")]; + tensor var_1201 = const()[name = tensor("op_1201"), val = tensor([1, 20, 64, -1])]; + tensor var_1202_cast_fp16 = reshape(shape = var_1201, x = key_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_1200_cast_fp16, y = var_1202_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor obj_55_cast_fp16 = softmax(axis = var_986, x = mh_w_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor var_1206 = const()[name = tensor("op_1206"), val = tensor([1, 20, 64, -1])]; + tensor var_1207_cast_fp16 = reshape(shape = var_1206, x = value_cast_fp16)[name = tensor("op_1207_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_1207_cast_fp16, y = obj_55_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_1210 = const()[name = tensor("op_1210"), val = tensor([1, 1280, 1, -1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1210, x = attn_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1220_pad_type_0 = const()[name = tensor("op_1220_pad_type_0"), val = tensor("valid")]; + tensor var_1220_strides_0 = const()[name = tensor("op_1220_strides_0"), val = tensor([1, 1])]; + tensor var_1220_pad_0 = const()[name = tensor("op_1220_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1220_dilations_0 = const()[name = tensor("op_1220_dilations_0"), val = tensor([1, 1])]; + tensor var_1220_groups_0 = const()[name = tensor("op_1220_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192079808))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192899072))), name = tensor("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192899200)))]; + tensor var_1220_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1220_dilations_0, groups = var_1220_groups_0, pad = var_1220_pad_0, pad_type = var_1220_pad_type_0, strides = var_1220_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1220_cast_fp16")]; + tensor var_1226_pad_type_0 = const()[name = tensor("op_1226_pad_type_0"), val = tensor("valid")]; + tensor var_1226_strides_0 = const()[name = tensor("op_1226_strides_0"), val = tensor([1, 1])]; + tensor var_1226_pad_0 = const()[name = tensor("op_1226_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1226_dilations_0 = const()[name = tensor("op_1226_dilations_0"), val = tensor([1, 1])]; + tensor var_1226_groups_0 = const()[name = tensor("op_1226_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192913536))), name = tensor("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192901824))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1226_cast_fp16 = conv(dilations = var_1226_dilations_0, groups = var_1226_groups_0, pad = var_1226_pad_0, pad_type = var_1226_pad_type_0, strides = var_1226_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor obj_53_cast_fp16 = add(x = var_1220_cast_fp16, y = var_1226_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1240_to_fp16 = const()[name = tensor("op_1240_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1240_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193118400)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193121024)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1258_pad_type_0 = const()[name = tensor("op_1258_pad_type_0"), val = tensor("valid")]; + tensor var_1258_strides_0 = const()[name = tensor("op_1258_strides_0"), val = tensor([1, 1])]; + tensor var_1258_pad_0 = const()[name = tensor("op_1258_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1258_dilations_0 = const()[name = tensor("op_1258_dilations_0"), val = tensor([1, 1])]; + tensor var_1258_groups_0 = const()[name = tensor("op_1258_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193123648))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196400512))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196400640)))]; + tensor var_1258_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1258_dilations_0, groups = var_1258_groups_0, pad = var_1258_pad_0, pad_type = var_1258_pad_type_0, strides = var_1258_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1264_pad_type_0 = const()[name = tensor("op_1264_pad_type_0"), val = tensor("valid")]; + tensor var_1264_strides_0 = const()[name = tensor("op_1264_strides_0"), val = tensor([1, 1])]; + tensor var_1264_pad_0 = const()[name = tensor("op_1264_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1264_dilations_0 = const()[name = tensor("op_1264_dilations_0"), val = tensor([1, 1])]; + tensor var_1264_groups_0 = const()[name = tensor("op_1264_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196463680))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196410944))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1264_cast_fp16 = conv(dilations = var_1264_dilations_0, groups = var_1264_groups_0, pad = var_1264_pad_0, pad_type = var_1264_pad_type_0, strides = var_1264_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1258_cast_fp16, y = var_1264_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1275_pad_type_0 = const()[name = tensor("op_1275_pad_type_0"), val = tensor("valid")]; + tensor var_1275_strides_0 = const()[name = tensor("op_1275_strides_0"), val = tensor([1, 1])]; + tensor var_1275_pad_0 = const()[name = tensor("op_1275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1275_dilations_0 = const()[name = tensor("op_1275_dilations_0"), val = tensor([1, 1])]; + tensor var_1275_groups_0 = const()[name = tensor("op_1275_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197282944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202198208))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202198400)))]; + tensor var_1275_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1275_dilations_0, groups = var_1275_groups_0, pad = var_1275_pad_0, pad_type = var_1275_pad_type_0, strides = var_1275_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_1275_cast_fp16")]; + tensor var_1281_pad_type_0 = const()[name = tensor("op_1281_pad_type_0"), val = tensor("valid")]; + tensor var_1281_strides_0 = const()[name = tensor("op_1281_strides_0"), val = tensor([1, 1])]; + tensor var_1281_pad_0 = const()[name = tensor("op_1281_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1281_dilations_0 = const()[name = tensor("op_1281_dilations_0"), val = tensor([1, 1])]; + tensor var_1281_groups_0 = const()[name = tensor("op_1281_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202271552))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202201024))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1281_cast_fp16 = conv(dilations = var_1281_dilations_0, groups = var_1281_groups_0, pad = var_1281_pad_0, pad_type = var_1281_pad_type_0, strides = var_1281_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_1281_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_1275_cast_fp16, y = var_1281_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_1301_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor hidden_states_gamma_0_to_fp16 = const()[name = tensor("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203090816)))]; + tensor hidden_states_beta_0_to_fp16 = const()[name = tensor("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203093440)))]; + tensor hidden_states_epsilon_0_to_fp16 = const()[name = tensor("hidden_states_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor var_1312_axes_0 = const()[name = tensor("op_1312_axes_0"), val = tensor([2])]; + tensor var_1312_cast_fp16 = squeeze(axes = var_1312_axes_0, x = hidden_states_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1315_perm_0 = const()[name = tensor("op_1315_perm_0"), val = tensor([0, 2, 1])]; + tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203096064)))]; + tensor var_1315_cast_fp16 = transpose(perm = var_1315_perm_0, x = var_1312_cast_fp16)[name = tensor("transpose_0")]; + tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_1315_cast_fp16)[name = tensor("linear_0_cast_fp16")]; + tensor var_1319 = const()[name = tensor("op_1319"), val = tensor(1)]; + tensor obj_59_interleave_0 = const()[name = tensor("obj_59_interleave_0"), val = tensor(false)]; + tensor key_cache_updates = concat(axis = var_1319, interleave = obj_59_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = tensor("obj_59_cast_fp16")]; + tensor var_1322 = const()[name = tensor("op_1322"), val = tensor(1)]; + tensor obj_61_interleave_0 = const()[name = tensor("obj_61_interleave_0"), val = tensor(false)]; + tensor value_cache_updates = concat(axis = var_1322, interleave = obj_61_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = tensor("obj_61_cast_fp16")]; + tensor var_1333_begin_0 = const()[name = tensor("op_1333_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_1333_end_0 = const()[name = tensor("op_1333_end_0"), val = tensor([1, 5, 1, 1500])]; + tensor var_1333_end_mask_0 = const()[name = tensor("op_1333_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1333_cast_fp16 = slice_by_index(begin = var_1333_begin_0, end = var_1333_end_0, end_mask = var_1333_end_mask_0, x = obj_41_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1336_squeeze_mask_0 = const()[name = tensor("op_1336_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, squeeze_mask = var_1336_squeeze_mask_0, x = var_1333_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1351_begin_0 = const()[name = tensor("op_1351_begin_0"), val = tensor([0, 11, 0, 0])]; + tensor var_1351_end_0 = const()[name = tensor("op_1351_end_0"), val = tensor([1, 12, 1, 1500])]; + tensor var_1351_end_mask_0 = const()[name = tensor("op_1351_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1351_cast_fp16 = slice_by_index(begin = var_1351_begin_0, end = var_1351_end_0, end_mask = var_1351_end_mask_0, x = obj_41_cast_fp16)[name = tensor("op_1351_cast_fp16")]; + tensor var_1354_begin_0 = const()[name = tensor("op_1354_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1354_end_0 = const()[name = tensor("op_1354_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1354_end_mask_0 = const()[name = tensor("op_1354_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1354_squeeze_mask_0 = const()[name = tensor("op_1354_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1354_cast_fp16 = slice_by_index(begin = var_1354_begin_0, end = var_1354_end_0, end_mask = var_1354_end_mask_0, squeeze_mask = var_1354_squeeze_mask_0, x = var_1351_cast_fp16)[name = tensor("op_1354_cast_fp16")]; + tensor var_1369_begin_0 = const()[name = tensor("op_1369_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_1369_end_0 = const()[name = tensor("op_1369_end_0"), val = tensor([1, 4, 1, 1500])]; + tensor var_1369_end_mask_0 = const()[name = tensor("op_1369_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = var_1369_end_0, end_mask = var_1369_end_mask_0, x = obj_55_cast_fp16)[name = tensor("op_1369_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1372_squeeze_mask_0 = const()[name = tensor("op_1372_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, squeeze_mask = var_1372_squeeze_mask_0, x = var_1369_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1387_begin_0 = const()[name = tensor("op_1387_begin_0"), val = tensor([0, 6, 0, 0])]; + tensor var_1387_end_0 = const()[name = tensor("op_1387_end_0"), val = tensor([1, 7, 1, 1500])]; + tensor var_1387_end_mask_0 = const()[name = tensor("op_1387_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1387_cast_fp16 = slice_by_index(begin = var_1387_begin_0, end = var_1387_end_0, end_mask = var_1387_end_mask_0, x = obj_55_cast_fp16)[name = tensor("op_1387_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1390_squeeze_mask_0 = const()[name = tensor("op_1390_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, squeeze_mask = var_1390_squeeze_mask_0, x = var_1387_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1405_begin_0 = const()[name = tensor("op_1405_begin_0"), val = tensor([0, 11, 0, 0])]; + tensor var_1405_end_0 = const()[name = tensor("op_1405_end_0"), val = tensor([1, 12, 1, 1500])]; + tensor var_1405_end_mask_0 = const()[name = tensor("op_1405_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1405_cast_fp16 = slice_by_index(begin = var_1405_begin_0, end = var_1405_end_0, end_mask = var_1405_end_mask_0, x = obj_55_cast_fp16)[name = tensor("op_1405_cast_fp16")]; + tensor var_1408_begin_0 = const()[name = tensor("op_1408_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1408_end_0 = const()[name = tensor("op_1408_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1408_end_mask_0 = const()[name = tensor("op_1408_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1408_squeeze_mask_0 = const()[name = tensor("op_1408_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1408_cast_fp16 = slice_by_index(begin = var_1408_begin_0, end = var_1408_end_0, end_mask = var_1408_end_mask_0, squeeze_mask = var_1408_squeeze_mask_0, x = var_1405_cast_fp16)[name = tensor("op_1408_cast_fp16")]; + tensor var_1423_begin_0 = const()[name = tensor("op_1423_begin_0"), val = tensor([0, 14, 0, 0])]; + tensor var_1423_end_0 = const()[name = tensor("op_1423_end_0"), val = tensor([1, 15, 1, 1500])]; + tensor var_1423_end_mask_0 = const()[name = tensor("op_1423_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1423_cast_fp16 = slice_by_index(begin = var_1423_begin_0, end = var_1423_end_0, end_mask = var_1423_end_mask_0, x = obj_55_cast_fp16)[name = tensor("op_1423_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1426_squeeze_mask_0 = const()[name = tensor("op_1426_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, squeeze_mask = var_1426_squeeze_mask_0, x = var_1423_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1433 = const()[name = tensor("op_1433"), val = tensor(1)]; + tensor var_1434_interleave_0 = const()[name = tensor("op_1434_interleave_0"), val = tensor(false)]; + tensor var_1434_cast_fp16 = concat(axis = var_1433, interleave = var_1434_interleave_0, values = (var_1336_cast_fp16, var_1354_cast_fp16, var_1372_cast_fp16, var_1390_cast_fp16, var_1408_cast_fp16, var_1426_cast_fp16))[name = tensor("op_1434_cast_fp16")]; + tensor var_1437 = const()[name = tensor("op_1437"), val = tensor(false)]; + tensor obj_axes_0 = const()[name = tensor("obj_axes_0"), val = tensor([1])]; + tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_1437, x = var_1434_cast_fp16)[name = tensor("obj_cast_fp16")]; + } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); +} \ No newline at end of file