diff --git "a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil" @@ -0,0 +1,4245 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}})] +{ + func main(tensor melspectrogram_features) { + tensor var_116_pad_type_0 = const()[name = tensor("op_116_pad_type_0"), val = tensor("custom")]; + tensor var_116_pad_0 = const()[name = tensor("op_116_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_116_strides_0 = const()[name = tensor("op_116_strides_0"), val = tensor([1, 1])]; + tensor var_116_dilations_0 = const()[name = tensor("op_116_dilations_0"), val = tensor([1, 1])]; + tensor var_116_groups_0 = const()[name = tensor("op_116_groups_0"), val = tensor(1)]; + tensor var_85_to_fp16 = const()[name = tensor("op_85_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_97_to_fp16 = const()[name = tensor("op_97_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(983168)))]; + tensor var_116_cast_fp16 = conv(bias = var_97_to_fp16, dilations = var_116_dilations_0, groups = var_116_groups_0, pad = var_116_pad_0, pad_type = var_116_pad_type_0, strides = var_116_strides_0, weight = var_85_to_fp16, x = melspectrogram_features)[name = tensor("op_116_cast_fp16")]; + tensor var_154_pad_type_0 = const()[name = tensor("op_154_pad_type_0"), val = tensor("custom")]; + tensor var_154_pad_0 = const()[name = tensor("op_154_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_154_strides_0 = const()[name = tensor("op_154_strides_0"), val = tensor([1, 1])]; + tensor var_154_dilations_0 = const()[name = tensor("op_154_dilations_0"), val = tensor([1, 1])]; + tensor var_154_groups_0 = const()[name = tensor("op_154_groups_0"), val = tensor(1)]; + tensor op_129_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1231616))), name = tensor("op_129_to_fp16_palettized"), shape = tensor([1280, 128, 1, 3])]; + tensor var_135_to_fp16 = const()[name = tensor("op_135_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1231744)))]; + tensor var_154_cast_fp16 = conv(bias = var_135_to_fp16, dilations = var_154_dilations_0, groups = var_154_groups_0, pad = var_154_pad_0, pad_type = var_154_pad_type_0, strides = var_154_strides_0, weight = op_129_to_fp16_palettized, x = melspectrogram_features)[name = tensor("op_154_cast_fp16")]; + tensor var_156_cast_fp16 = add(x = var_116_cast_fp16, y = var_154_cast_fp16)[name = tensor("op_156_cast_fp16")]; + tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_156_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_202_pad_type_0 = const()[name = tensor("op_202_pad_type_0"), val = tensor("custom")]; + tensor var_202_pad_0 = const()[name = tensor("op_202_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_202_strides_0 = const()[name = tensor("op_202_strides_0"), val = tensor([2, 2])]; + tensor var_202_dilations_0 = const()[name = tensor("op_202_dilations_0"), val = tensor([1, 1])]; + tensor var_202_groups_0 = const()[name = tensor("op_202_groups_0"), val = tensor(1)]; + tensor var_171_to_fp16 = const()[name = tensor("op_171_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234368)))]; + tensor var_202_cast_fp16 = conv(bias = var_97_to_fp16, dilations = var_202_dilations_0, groups = var_202_groups_0, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_202_strides_0, weight = var_171_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_202_cast_fp16")]; + tensor var_240_pad_type_0 = const()[name = tensor("op_240_pad_type_0"), val = tensor("custom")]; + tensor var_240_pad_0 = const()[name = tensor("op_240_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_240_strides_0 = const()[name = tensor("op_240_strides_0"), val = tensor([2, 2])]; + tensor var_240_dilations_0 = const()[name = tensor("op_240_dilations_0"), val = tensor([1, 1])]; + tensor var_240_groups_0 = const()[name = tensor("op_240_groups_0"), val = tensor(1)]; + tensor op_215_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11064832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13522496))), name = tensor("op_215_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 3])]; + tensor var_221_to_fp16 = const()[name = tensor("op_221_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13522624)))]; + tensor var_240_cast_fp16 = conv(bias = var_221_to_fp16, dilations = var_240_dilations_0, groups = var_240_groups_0, pad = var_240_pad_0, pad_type = var_240_pad_type_0, strides = var_240_strides_0, weight = op_215_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = tensor("op_240_cast_fp16")]; + tensor var_242_cast_fp16 = add(x = var_202_cast_fp16, y = var_240_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_242_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor var_262_to_fp16 = const()[name = tensor("op_262_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13525248)))]; + tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_262_to_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_295_to_fp16 = const()[name = tensor("op_295_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_295_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17365312)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17367936)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17370560)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_317_pad_type_0 = const()[name = tensor("op_317_pad_type_0"), val = tensor("valid")]; + tensor var_317_strides_0 = const()[name = tensor("op_317_strides_0"), val = tensor([1, 1])]; + tensor var_317_pad_0 = const()[name = tensor("op_317_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_317_dilations_0 = const()[name = tensor("op_317_dilations_0"), val = tensor([1, 1])]; + tensor var_317_groups_0 = const()[name = tensor("op_317_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17373184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18192448))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18192576)))]; + tensor var_317_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_317_dilations_0, groups = var_317_groups_0, pad = var_317_pad_0, pad_type = var_317_pad_type_0, strides = var_317_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_317_cast_fp16")]; + tensor var_323_pad_type_0 = const()[name = tensor("op_323_pad_type_0"), val = tensor("valid")]; + tensor var_323_strides_0 = const()[name = tensor("op_323_strides_0"), val = tensor([1, 1])]; + tensor var_323_pad_0 = const()[name = tensor("op_323_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_323_dilations_0 = const()[name = tensor("op_323_dilations_0"), val = tensor([1, 1])]; + tensor var_323_groups_0 = const()[name = tensor("op_323_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18275392))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18195200))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_323_cast_fp16 = conv(dilations = var_323_dilations_0, groups = var_323_groups_0, pad = var_323_pad_0, pad_type = var_323_pad_type_0, strides = var_323_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_323_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_317_cast_fp16, y = var_323_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_332_pad_type_0 = const()[name = tensor("op_332_pad_type_0"), val = tensor("valid")]; + tensor var_332_strides_0 = const()[name = tensor("op_332_strides_0"), val = tensor([1, 1])]; + tensor var_332_pad_0 = const()[name = tensor("op_332_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_332_dilations_0 = const()[name = tensor("op_332_dilations_0"), val = tensor([1, 1])]; + tensor var_332_groups_0 = const()[name = tensor("op_332_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18480256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19299520))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_332_cast_fp16 = conv(dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_332_cast_fp16")]; + tensor var_338_pad_type_0 = const()[name = tensor("op_338_pad_type_0"), val = tensor("valid")]; + tensor var_338_strides_0 = const()[name = tensor("op_338_strides_0"), val = tensor([1, 1])]; + tensor var_338_pad_0 = const()[name = tensor("op_338_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_338_dilations_0 = const()[name = tensor("op_338_dilations_0"), val = tensor([1, 1])]; + tensor var_338_groups_0 = const()[name = tensor("op_338_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19363520))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19299648))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_338_cast_fp16 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_338_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_332_cast_fp16, y = var_338_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_348_pad_type_0 = const()[name = tensor("op_348_pad_type_0"), val = tensor("valid")]; + tensor var_348_strides_0 = const()[name = tensor("op_348_strides_0"), val = tensor([1, 1])]; + tensor var_348_pad_0 = const()[name = tensor("op_348_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_348_dilations_0 = const()[name = tensor("op_348_dilations_0"), val = tensor([1, 1])]; + tensor var_348_groups_0 = const()[name = tensor("op_348_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19568384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20387648))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20387776)))]; + tensor var_348_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_354_pad_type_0 = const()[name = tensor("op_354_pad_type_0"), val = tensor("valid")]; + tensor var_354_strides_0 = const()[name = tensor("op_354_strides_0"), val = tensor([1, 1])]; + tensor var_354_pad_0 = const()[name = tensor("op_354_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_354_dilations_0 = const()[name = tensor("op_354_dilations_0"), val = tensor([1, 1])]; + tensor var_354_groups_0 = const()[name = tensor("op_354_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20474048))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20390400))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_354_cast_fp16 = conv(dilations = var_354_dilations_0, groups = var_354_groups_0, pad = var_354_pad_0, pad_type = var_354_pad_type_0, strides = var_354_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_354_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_348_cast_fp16, y = var_354_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_357 = const()[name = tensor("op_357"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_357, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_359_to_fp16 = const()[name = tensor("op_359_to_fp16"), val = tensor(0x1p-3)]; + tensor var_360_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_359_to_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_361 = const()[name = tensor("op_361"), val = tensor([1, 20, 64, -1])]; + tensor var_362_cast_fp16 = reshape(shape = var_361, x = key_1_cast_fp16)[name = tensor("op_362_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_360_cast_fp16, y = var_362_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_365_cast_fp16 = softmax(axis = var_276, x = mh_w_1_cast_fp16)[name = tensor("op_365_cast_fp16")]; + tensor var_366 = const()[name = tensor("op_366"), val = tensor([1, 20, 64, -1])]; + tensor var_367_cast_fp16 = reshape(shape = var_366, x = value_1_cast_fp16)[name = tensor("op_367_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_367_cast_fp16, y = var_365_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_370 = const()[name = tensor("op_370"), val = tensor([1, 1280, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_370, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_380_pad_type_0 = const()[name = tensor("op_380_pad_type_0"), val = tensor("valid")]; + tensor var_380_strides_0 = const()[name = tensor("op_380_strides_0"), val = tensor([1, 1])]; + tensor var_380_pad_0 = const()[name = tensor("op_380_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_380_dilations_0 = const()[name = tensor("op_380_dilations_0"), val = tensor([1, 1])]; + tensor var_380_groups_0 = const()[name = tensor("op_380_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20678912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21498176))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21498304)))]; + tensor var_380_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_380_dilations_0, groups = var_380_groups_0, pad = var_380_pad_0, pad_type = var_380_pad_type_0, strides = var_380_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_386_pad_type_0 = const()[name = tensor("op_386_pad_type_0"), val = tensor("valid")]; + tensor var_386_strides_0 = const()[name = tensor("op_386_strides_0"), val = tensor([1, 1])]; + tensor var_386_pad_0 = const()[name = tensor("op_386_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_386_dilations_0 = const()[name = tensor("op_386_dilations_0"), val = tensor([1, 1])]; + tensor var_386_groups_0 = const()[name = tensor("op_386_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21553024))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21500928))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_386_cast_fp16 = conv(dilations = var_386_dilations_0, groups = var_386_groups_0, pad = var_386_pad_0, pad_type = var_386_pad_type_0, strides = var_386_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_386_cast_fp16")]; + tensor obj_3_cast_fp16 = add(x = var_380_cast_fp16, y = var_386_cast_fp16)[name = tensor("obj_3_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_397_to_fp16 = const()[name = tensor("op_397_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_397_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21757888)))]; + tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21760512)))]; + tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_415_pad_type_0 = const()[name = tensor("op_415_pad_type_0"), val = tensor("valid")]; + tensor var_415_strides_0 = const()[name = tensor("op_415_strides_0"), val = tensor([1, 1])]; + tensor var_415_pad_0 = const()[name = tensor("op_415_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_415_dilations_0 = const()[name = tensor("op_415_dilations_0"), val = tensor([1, 1])]; + tensor var_415_groups_0 = const()[name = tensor("op_415_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21763136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25040000))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25040128)))]; + tensor var_415_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_415_dilations_0, groups = var_415_groups_0, pad = var_415_pad_0, pad_type = var_415_pad_type_0, strides = var_415_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_415_cast_fp16")]; + tensor var_421_pad_type_0 = const()[name = tensor("op_421_pad_type_0"), val = tensor("valid")]; + tensor var_421_strides_0 = const()[name = tensor("op_421_strides_0"), val = tensor([1, 1])]; + tensor var_421_pad_0 = const()[name = tensor("op_421_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_421_dilations_0 = const()[name = tensor("op_421_dilations_0"), val = tensor([1, 1])]; + tensor var_421_groups_0 = const()[name = tensor("op_421_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25353152))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25050432))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_421_cast_fp16 = conv(dilations = var_421_dilations_0, groups = var_421_groups_0, pad = var_421_pad_0, pad_type = var_421_pad_type_0, strides = var_421_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_421_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = var_415_cast_fp16, y = var_421_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; + tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_432_pad_type_0 = const()[name = tensor("op_432_pad_type_0"), val = tensor("valid")]; + tensor var_432_strides_0 = const()[name = tensor("op_432_strides_0"), val = tensor([1, 1])]; + tensor var_432_pad_0 = const()[name = tensor("op_432_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_432_dilations_0 = const()[name = tensor("op_432_dilations_0"), val = tensor([1, 1])]; + tensor var_432_groups_0 = const()[name = tensor("op_432_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26172416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29449280))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29449408)))]; + tensor var_432_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_432_dilations_0, groups = var_432_groups_0, pad = var_432_pad_0, pad_type = var_432_pad_type_0, strides = var_432_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_438_pad_type_0 = const()[name = tensor("op_438_pad_type_0"), val = tensor("valid")]; + tensor var_438_strides_0 = const()[name = tensor("op_438_strides_0"), val = tensor([1, 1])]; + tensor var_438_pad_0 = const()[name = tensor("op_438_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_438_dilations_0 = const()[name = tensor("op_438_dilations_0"), val = tensor([1, 1])]; + tensor var_438_groups_0 = const()[name = tensor("op_438_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29689088))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29452032))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_438_cast_fp16 = conv(dilations = var_438_dilations_0, groups = var_438_groups_0, pad = var_438_pad_0, pad_type = var_438_pad_type_0, strides = var_438_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = tensor("op_438_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_432_cast_fp16, y = var_438_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_467_to_fp16 = const()[name = tensor("op_467_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_467_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30508352)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30510976)))]; + tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; + tensor var_489_pad_type_0 = const()[name = tensor("op_489_pad_type_0"), val = tensor("valid")]; + tensor var_489_strides_0 = const()[name = tensor("op_489_strides_0"), val = tensor([1, 1])]; + tensor var_489_pad_0 = const()[name = tensor("op_489_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_489_dilations_0 = const()[name = tensor("op_489_dilations_0"), val = tensor([1, 1])]; + tensor var_489_groups_0 = const()[name = tensor("op_489_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30513600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31332864))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31332992)))]; + tensor var_489_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_489_dilations_0, groups = var_489_groups_0, pad = var_489_pad_0, pad_type = var_489_pad_type_0, strides = var_489_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_495_pad_type_0 = const()[name = tensor("op_495_pad_type_0"), val = tensor("valid")]; + tensor var_495_strides_0 = const()[name = tensor("op_495_strides_0"), val = tensor([1, 1])]; + tensor var_495_pad_0 = const()[name = tensor("op_495_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_495_dilations_0 = const()[name = tensor("op_495_dilations_0"), val = tensor([1, 1])]; + tensor var_495_groups_0 = const()[name = tensor("op_495_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31399552))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31335616))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_495_cast_fp16 = conv(dilations = var_495_dilations_0, groups = var_495_groups_0, pad = var_495_pad_0, pad_type = var_495_pad_type_0, strides = var_495_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_495_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_489_cast_fp16, y = var_495_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_504_pad_type_0 = const()[name = tensor("op_504_pad_type_0"), val = tensor("valid")]; + tensor var_504_strides_0 = const()[name = tensor("op_504_strides_0"), val = tensor([1, 1])]; + tensor var_504_pad_0 = const()[name = tensor("op_504_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_504_dilations_0 = const()[name = tensor("op_504_dilations_0"), val = tensor([1, 1])]; + tensor var_504_groups_0 = const()[name = tensor("op_504_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31604416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32423680))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_504_cast_fp16 = conv(dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_504_cast_fp16")]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("valid")]; + tensor var_510_strides_0 = const()[name = tensor("op_510_strides_0"), val = tensor([1, 1])]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_dilations_0 = const()[name = tensor("op_510_dilations_0"), val = tensor([1, 1])]; + tensor var_510_groups_0 = const()[name = tensor("op_510_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32475264))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32423808))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_510_cast_fp16 = conv(dilations = var_510_dilations_0, groups = var_510_groups_0, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_510_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_504_cast_fp16, y = var_510_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_520_pad_type_0 = const()[name = tensor("op_520_pad_type_0"), val = tensor("valid")]; + tensor var_520_strides_0 = const()[name = tensor("op_520_strides_0"), val = tensor([1, 1])]; + tensor var_520_pad_0 = const()[name = tensor("op_520_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_520_dilations_0 = const()[name = tensor("op_520_dilations_0"), val = tensor([1, 1])]; + tensor var_520_groups_0 = const()[name = tensor("op_520_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32680128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33499392))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33499520)))]; + tensor var_520_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_520_dilations_0, groups = var_520_groups_0, pad = var_520_pad_0, pad_type = var_520_pad_type_0, strides = var_520_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_520_cast_fp16")]; + tensor var_526_pad_type_0 = const()[name = tensor("op_526_pad_type_0"), val = tensor("valid")]; + tensor var_526_strides_0 = const()[name = tensor("op_526_strides_0"), val = tensor([1, 1])]; + tensor var_526_pad_0 = const()[name = tensor("op_526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_526_dilations_0 = const()[name = tensor("op_526_dilations_0"), val = tensor([1, 1])]; + tensor var_526_groups_0 = const()[name = tensor("op_526_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33549632))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33502144))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_526_cast_fp16 = conv(dilations = var_526_dilations_0, groups = var_526_groups_0, pad = var_526_pad_0, pad_type = var_526_pad_type_0, strides = var_526_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_520_cast_fp16, y = var_526_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_529 = const()[name = tensor("op_529"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_529, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_531_to_fp16 = const()[name = tensor("op_531_to_fp16"), val = tensor(0x1p-3)]; + tensor var_532_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_531_to_fp16)[name = tensor("op_532_cast_fp16")]; + tensor var_533 = const()[name = tensor("op_533"), val = tensor([1, 20, 64, -1])]; + tensor var_534_cast_fp16 = reshape(shape = var_533, x = key_3_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor mh_w_3_transpose_x_0 = const()[name = tensor("mh_w_3_transpose_x_0"), val = tensor(true)]; + tensor mh_w_3_transpose_y_0 = const()[name = tensor("mh_w_3_transpose_y_0"), val = tensor(false)]; + tensor mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_532_cast_fp16, y = var_534_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_537_cast_fp16 = softmax(axis = var_448, x = mh_w_3_cast_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_538 = const()[name = tensor("op_538"), val = tensor([1, 20, 64, -1])]; + tensor var_539_cast_fp16 = reshape(shape = var_538, x = value_3_cast_fp16)[name = tensor("op_539_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_539_cast_fp16, y = var_537_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_542 = const()[name = tensor("op_542"), val = tensor([1, 1280, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_542, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_552_pad_type_0 = const()[name = tensor("op_552_pad_type_0"), val = tensor("valid")]; + tensor var_552_strides_0 = const()[name = tensor("op_552_strides_0"), val = tensor([1, 1])]; + tensor var_552_pad_0 = const()[name = tensor("op_552_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_552_dilations_0 = const()[name = tensor("op_552_dilations_0"), val = tensor([1, 1])]; + tensor var_552_groups_0 = const()[name = tensor("op_552_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33754496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34573760))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34573888)))]; + tensor var_552_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_552_dilations_0, groups = var_552_groups_0, pad = var_552_pad_0, pad_type = var_552_pad_type_0, strides = var_552_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_552_cast_fp16")]; + tensor var_558_pad_type_0 = const()[name = tensor("op_558_pad_type_0"), val = tensor("valid")]; + tensor var_558_strides_0 = const()[name = tensor("op_558_strides_0"), val = tensor([1, 1])]; + tensor var_558_pad_0 = const()[name = tensor("op_558_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_558_dilations_0 = const()[name = tensor("op_558_dilations_0"), val = tensor([1, 1])]; + tensor var_558_groups_0 = const()[name = tensor("op_558_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34612736))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34576512))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_558_cast_fp16 = conv(dilations = var_558_dilations_0, groups = var_558_groups_0, pad = var_558_pad_0, pad_type = var_558_pad_type_0, strides = var_558_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_552_cast_fp16, y = var_558_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_569_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34817600)))]; + tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34820224)))]; + tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_587_pad_type_0 = const()[name = tensor("op_587_pad_type_0"), val = tensor("valid")]; + tensor var_587_strides_0 = const()[name = tensor("op_587_strides_0"), val = tensor([1, 1])]; + tensor var_587_pad_0 = const()[name = tensor("op_587_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_587_dilations_0 = const()[name = tensor("op_587_dilations_0"), val = tensor([1, 1])]; + tensor var_587_groups_0 = const()[name = tensor("op_587_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34822848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38099712))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38099840)))]; + tensor var_587_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_587_dilations_0, groups = var_587_groups_0, pad = var_587_pad_0, pad_type = var_587_pad_type_0, strides = var_587_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_587_cast_fp16")]; + tensor var_593_pad_type_0 = const()[name = tensor("op_593_pad_type_0"), val = tensor("valid")]; + tensor var_593_strides_0 = const()[name = tensor("op_593_strides_0"), val = tensor([1, 1])]; + tensor var_593_pad_0 = const()[name = tensor("op_593_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_593_dilations_0 = const()[name = tensor("op_593_dilations_0"), val = tensor([1, 1])]; + tensor var_593_groups_0 = const()[name = tensor("op_593_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38256320))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38110144))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_593_cast_fp16 = conv(dilations = var_593_dilations_0, groups = var_593_groups_0, pad = var_593_pad_0, pad_type = var_593_pad_type_0, strides = var_593_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_593_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_587_cast_fp16, y = var_593_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; + tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_604_pad_type_0 = const()[name = tensor("op_604_pad_type_0"), val = tensor("valid")]; + tensor var_604_strides_0 = const()[name = tensor("op_604_strides_0"), val = tensor([1, 1])]; + tensor var_604_pad_0 = const()[name = tensor("op_604_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_604_dilations_0 = const()[name = tensor("op_604_dilations_0"), val = tensor([1, 1])]; + tensor var_604_groups_0 = const()[name = tensor("op_604_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39075584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42352448))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42352576)))]; + tensor var_604_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_604_dilations_0, groups = var_604_groups_0, pad = var_604_pad_0, pad_type = var_604_pad_type_0, strides = var_604_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_610_pad_type_0 = const()[name = tensor("op_610_pad_type_0"), val = tensor("valid")]; + tensor var_610_strides_0 = const()[name = tensor("op_610_strides_0"), val = tensor([1, 1])]; + tensor var_610_pad_0 = const()[name = tensor("op_610_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_610_dilations_0 = const()[name = tensor("op_610_dilations_0"), val = tensor([1, 1])]; + tensor var_610_groups_0 = const()[name = tensor("op_610_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42567552))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42355200))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_610_cast_fp16 = conv(dilations = var_610_dilations_0, groups = var_610_groups_0, pad = var_610_pad_0, pad_type = var_610_pad_type_0, strides = var_610_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_604_cast_fp16, y = var_610_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor var_620 = const()[name = tensor("op_620"), val = tensor(3)]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_639_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43386816)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43389440)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_661_pad_type_0 = const()[name = tensor("op_661_pad_type_0"), val = tensor("valid")]; + tensor var_661_strides_0 = const()[name = tensor("op_661_strides_0"), val = tensor([1, 1])]; + tensor var_661_pad_0 = const()[name = tensor("op_661_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_661_dilations_0 = const()[name = tensor("op_661_dilations_0"), val = tensor([1, 1])]; + tensor var_661_groups_0 = const()[name = tensor("op_661_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43392064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44211328))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44211456)))]; + tensor var_661_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_661_dilations_0, groups = var_661_groups_0, pad = var_661_pad_0, pad_type = var_661_pad_type_0, strides = var_661_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_661_cast_fp16")]; + tensor var_667_pad_type_0 = const()[name = tensor("op_667_pad_type_0"), val = tensor("valid")]; + tensor var_667_strides_0 = const()[name = tensor("op_667_strides_0"), val = tensor([1, 1])]; + tensor var_667_pad_0 = const()[name = tensor("op_667_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_667_dilations_0 = const()[name = tensor("op_667_dilations_0"), val = tensor([1, 1])]; + tensor var_667_groups_0 = const()[name = tensor("op_667_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44271552))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44214080))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_667_cast_fp16 = conv(dilations = var_667_dilations_0, groups = var_667_groups_0, pad = var_667_pad_0, pad_type = var_667_pad_type_0, strides = var_667_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_667_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_661_cast_fp16, y = var_667_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_676_pad_type_0 = const()[name = tensor("op_676_pad_type_0"), val = tensor("valid")]; + tensor var_676_strides_0 = const()[name = tensor("op_676_strides_0"), val = tensor([1, 1])]; + tensor var_676_pad_0 = const()[name = tensor("op_676_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_676_dilations_0 = const()[name = tensor("op_676_dilations_0"), val = tensor([1, 1])]; + tensor var_676_groups_0 = const()[name = tensor("op_676_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44476416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45295680))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_676_cast_fp16 = conv(dilations = var_676_dilations_0, groups = var_676_groups_0, pad = var_676_pad_0, pad_type = var_676_pad_type_0, strides = var_676_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_682_pad_type_0 = const()[name = tensor("op_682_pad_type_0"), val = tensor("valid")]; + tensor var_682_strides_0 = const()[name = tensor("op_682_strides_0"), val = tensor([1, 1])]; + tensor var_682_pad_0 = const()[name = tensor("op_682_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_682_dilations_0 = const()[name = tensor("op_682_dilations_0"), val = tensor([1, 1])]; + tensor var_682_groups_0 = const()[name = tensor("op_682_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45344832))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45295808))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_682_cast_fp16 = conv(dilations = var_682_dilations_0, groups = var_682_groups_0, pad = var_682_pad_0, pad_type = var_682_pad_type_0, strides = var_682_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_682_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_676_cast_fp16, y = var_682_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_692_pad_type_0 = const()[name = tensor("op_692_pad_type_0"), val = tensor("valid")]; + tensor var_692_strides_0 = const()[name = tensor("op_692_strides_0"), val = tensor([1, 1])]; + tensor var_692_pad_0 = const()[name = tensor("op_692_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_692_dilations_0 = const()[name = tensor("op_692_dilations_0"), val = tensor([1, 1])]; + tensor var_692_groups_0 = const()[name = tensor("op_692_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45549696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46368960))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46369088)))]; + tensor var_692_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_692_dilations_0, groups = var_692_groups_0, pad = var_692_pad_0, pad_type = var_692_pad_type_0, strides = var_692_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_698_pad_type_0 = const()[name = tensor("op_698_pad_type_0"), val = tensor("valid")]; + tensor var_698_strides_0 = const()[name = tensor("op_698_strides_0"), val = tensor([1, 1])]; + tensor var_698_pad_0 = const()[name = tensor("op_698_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_698_dilations_0 = const()[name = tensor("op_698_dilations_0"), val = tensor([1, 1])]; + tensor var_698_groups_0 = const()[name = tensor("op_698_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46410240))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46371712))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_698_cast_fp16 = conv(dilations = var_698_dilations_0, groups = var_698_groups_0, pad = var_698_pad_0, pad_type = var_698_pad_type_0, strides = var_698_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_698_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_692_cast_fp16, y = var_698_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_701 = const()[name = tensor("op_701"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_701, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1p-3)]; + tensor var_704_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_705 = const()[name = tensor("op_705"), val = tensor([1, 20, 64, -1])]; + tensor var_706_cast_fp16 = reshape(shape = var_705, x = key_5_cast_fp16)[name = tensor("op_706_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_704_cast_fp16, y = var_706_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor var_709_cast_fp16 = softmax(axis = var_620, x = mh_w_5_cast_fp16)[name = tensor("op_709_cast_fp16")]; + tensor var_710 = const()[name = tensor("op_710"), val = tensor([1, 20, 64, -1])]; + tensor var_711_cast_fp16 = reshape(shape = var_710, x = value_5_cast_fp16)[name = tensor("op_711_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_711_cast_fp16, y = var_709_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_714 = const()[name = tensor("op_714"), val = tensor([1, 1280, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_714, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_724_pad_type_0 = const()[name = tensor("op_724_pad_type_0"), val = tensor("valid")]; + tensor var_724_strides_0 = const()[name = tensor("op_724_strides_0"), val = tensor([1, 1])]; + tensor var_724_pad_0 = const()[name = tensor("op_724_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_724_dilations_0 = const()[name = tensor("op_724_dilations_0"), val = tensor([1, 1])]; + tensor var_724_groups_0 = const()[name = tensor("op_724_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46615104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47434368))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47434496)))]; + tensor var_724_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_724_dilations_0, groups = var_724_groups_0, pad = var_724_pad_0, pad_type = var_724_pad_type_0, strides = var_724_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = tensor("op_724_cast_fp16")]; + tensor var_730_pad_type_0 = const()[name = tensor("op_730_pad_type_0"), val = tensor("valid")]; + tensor var_730_strides_0 = const()[name = tensor("op_730_strides_0"), val = tensor([1, 1])]; + tensor var_730_pad_0 = const()[name = tensor("op_730_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_730_dilations_0 = const()[name = tensor("op_730_dilations_0"), val = tensor([1, 1])]; + tensor var_730_groups_0 = const()[name = tensor("op_730_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47467328))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47437120))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_730_cast_fp16 = conv(dilations = var_730_dilations_0, groups = var_730_groups_0, pad = var_730_pad_0, pad_type = var_730_pad_type_0, strides = var_730_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = tensor("op_730_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_724_cast_fp16, y = var_730_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_741_to_fp16 = const()[name = tensor("op_741_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_741_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47672192)))]; + tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47674816)))]; + tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_759_pad_type_0 = const()[name = tensor("op_759_pad_type_0"), val = tensor("valid")]; + tensor var_759_strides_0 = const()[name = tensor("op_759_strides_0"), val = tensor([1, 1])]; + tensor var_759_pad_0 = const()[name = tensor("op_759_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_759_dilations_0 = const()[name = tensor("op_759_dilations_0"), val = tensor([1, 1])]; + tensor var_759_groups_0 = const()[name = tensor("op_759_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47677440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50954304))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50954432)))]; + tensor var_759_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_759_dilations_0, groups = var_759_groups_0, pad = var_759_pad_0, pad_type = var_759_pad_type_0, strides = var_759_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_765_pad_type_0 = const()[name = tensor("op_765_pad_type_0"), val = tensor("valid")]; + tensor var_765_strides_0 = const()[name = tensor("op_765_strides_0"), val = tensor([1, 1])]; + tensor var_765_pad_0 = const()[name = tensor("op_765_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_765_dilations_0 = const()[name = tensor("op_765_dilations_0"), val = tensor([1, 1])]; + tensor var_765_groups_0 = const()[name = tensor("op_765_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51003008))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50964736))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_765_cast_fp16 = conv(dilations = var_765_dilations_0, groups = var_765_groups_0, pad = var_765_pad_0, pad_type = var_765_pad_type_0, strides = var_765_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = var_759_cast_fp16, y = var_765_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; + tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_776_pad_type_0 = const()[name = tensor("op_776_pad_type_0"), val = tensor("valid")]; + tensor var_776_strides_0 = const()[name = tensor("op_776_strides_0"), val = tensor([1, 1])]; + tensor var_776_pad_0 = const()[name = tensor("op_776_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_776_dilations_0 = const()[name = tensor("op_776_dilations_0"), val = tensor([1, 1])]; + tensor var_776_groups_0 = const()[name = tensor("op_776_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51822272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55099136))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55099264)))]; + tensor var_776_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_776_dilations_0, groups = var_776_groups_0, pad = var_776_pad_0, pad_type = var_776_pad_type_0, strides = var_776_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_782_pad_type_0 = const()[name = tensor("op_782_pad_type_0"), val = tensor("valid")]; + tensor var_782_strides_0 = const()[name = tensor("op_782_strides_0"), val = tensor([1, 1])]; + tensor var_782_pad_0 = const()[name = tensor("op_782_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_782_dilations_0 = const()[name = tensor("op_782_dilations_0"), val = tensor([1, 1])]; + tensor var_782_groups_0 = const()[name = tensor("op_782_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55315392))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55101888))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_782_cast_fp16 = conv(dilations = var_782_dilations_0, groups = var_782_groups_0, pad = var_782_pad_0, pad_type = var_782_pad_type_0, strides = var_782_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_776_cast_fp16, y = var_782_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_792 = const()[name = tensor("op_792"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_811_to_fp16 = const()[name = tensor("op_811_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_811_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56134656)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56137280)))]; + tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_833_pad_type_0 = const()[name = tensor("op_833_pad_type_0"), val = tensor("valid")]; + tensor var_833_strides_0 = const()[name = tensor("op_833_strides_0"), val = tensor([1, 1])]; + tensor var_833_pad_0 = const()[name = tensor("op_833_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_833_dilations_0 = const()[name = tensor("op_833_dilations_0"), val = tensor([1, 1])]; + tensor var_833_groups_0 = const()[name = tensor("op_833_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56139904))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56959168))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56959296)))]; + tensor var_833_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_833_dilations_0, groups = var_833_groups_0, pad = var_833_pad_0, pad_type = var_833_pad_type_0, strides = var_833_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_833_cast_fp16")]; + tensor var_839_pad_type_0 = const()[name = tensor("op_839_pad_type_0"), val = tensor("valid")]; + tensor var_839_strides_0 = const()[name = tensor("op_839_strides_0"), val = tensor([1, 1])]; + tensor var_839_pad_0 = const()[name = tensor("op_839_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_839_dilations_0 = const()[name = tensor("op_839_dilations_0"), val = tensor([1, 1])]; + tensor var_839_groups_0 = const()[name = tensor("op_839_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57013184))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56961920))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_839_cast_fp16 = conv(dilations = var_839_dilations_0, groups = var_839_groups_0, pad = var_839_pad_0, pad_type = var_839_pad_type_0, strides = var_839_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_839_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_833_cast_fp16, y = var_839_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_848_pad_type_0 = const()[name = tensor("op_848_pad_type_0"), val = tensor("valid")]; + tensor var_848_strides_0 = const()[name = tensor("op_848_strides_0"), val = tensor([1, 1])]; + tensor var_848_pad_0 = const()[name = tensor("op_848_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_848_dilations_0 = const()[name = tensor("op_848_dilations_0"), val = tensor([1, 1])]; + tensor var_848_groups_0 = const()[name = tensor("op_848_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57218048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58037312))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_848_cast_fp16 = conv(dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_848_cast_fp16")]; + tensor var_854_pad_type_0 = const()[name = tensor("op_854_pad_type_0"), val = tensor("valid")]; + tensor var_854_strides_0 = const()[name = tensor("op_854_strides_0"), val = tensor([1, 1])]; + tensor var_854_pad_0 = const()[name = tensor("op_854_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_854_dilations_0 = const()[name = tensor("op_854_dilations_0"), val = tensor([1, 1])]; + tensor var_854_groups_0 = const()[name = tensor("op_854_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58077888))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58037440))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_854_cast_fp16 = conv(dilations = var_854_dilations_0, groups = var_854_groups_0, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_854_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_854_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_848_cast_fp16, y = var_854_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_864_pad_type_0 = const()[name = tensor("op_864_pad_type_0"), val = tensor("valid")]; + tensor var_864_strides_0 = const()[name = tensor("op_864_strides_0"), val = tensor([1, 1])]; + tensor var_864_pad_0 = const()[name = tensor("op_864_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_864_dilations_0 = const()[name = tensor("op_864_dilations_0"), val = tensor([1, 1])]; + tensor var_864_groups_0 = const()[name = tensor("op_864_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58282752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59102016))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59102144)))]; + tensor var_864_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_864_dilations_0, groups = var_864_groups_0, pad = var_864_pad_0, pad_type = var_864_pad_type_0, strides = var_864_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_864_cast_fp16")]; + tensor var_870_pad_type_0 = const()[name = tensor("op_870_pad_type_0"), val = tensor("valid")]; + tensor var_870_strides_0 = const()[name = tensor("op_870_strides_0"), val = tensor([1, 1])]; + tensor var_870_pad_0 = const()[name = tensor("op_870_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_870_dilations_0 = const()[name = tensor("op_870_dilations_0"), val = tensor([1, 1])]; + tensor var_870_groups_0 = const()[name = tensor("op_870_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59138432))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59104768))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_870_cast_fp16 = conv(dilations = var_870_dilations_0, groups = var_870_groups_0, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_870_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_870_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_864_cast_fp16, y = var_870_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_873 = const()[name = tensor("op_873"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_873, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_875_to_fp16 = const()[name = tensor("op_875_to_fp16"), val = tensor(0x1p-3)]; + tensor var_876_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_875_to_fp16)[name = tensor("op_876_cast_fp16")]; + tensor var_877 = const()[name = tensor("op_877"), val = tensor([1, 20, 64, -1])]; + tensor var_878_cast_fp16 = reshape(shape = var_877, x = key_7_cast_fp16)[name = tensor("op_878_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_876_cast_fp16, y = var_878_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor var_881_cast_fp16 = softmax(axis = var_792, x = mh_w_7_cast_fp16)[name = tensor("op_881_cast_fp16")]; + tensor var_882 = const()[name = tensor("op_882"), val = tensor([1, 20, 64, -1])]; + tensor var_883_cast_fp16 = reshape(shape = var_882, x = value_7_cast_fp16)[name = tensor("op_883_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_883_cast_fp16, y = var_881_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_886 = const()[name = tensor("op_886"), val = tensor([1, 1280, 1, -1])]; + tensor input_25_cast_fp16 = reshape(shape = var_886, x = attn_7_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_896_pad_type_0 = const()[name = tensor("op_896_pad_type_0"), val = tensor("valid")]; + tensor var_896_strides_0 = const()[name = tensor("op_896_strides_0"), val = tensor([1, 1])]; + tensor var_896_pad_0 = const()[name = tensor("op_896_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_896_dilations_0 = const()[name = tensor("op_896_dilations_0"), val = tensor([1, 1])]; + tensor var_896_groups_0 = const()[name = tensor("op_896_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59343296))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60162560))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60162688)))]; + tensor var_896_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_896_dilations_0, groups = var_896_groups_0, pad = var_896_pad_0, pad_type = var_896_pad_type_0, strides = var_896_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("valid")]; + tensor var_902_strides_0 = const()[name = tensor("op_902_strides_0"), val = tensor([1, 1])]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_dilations_0 = const()[name = tensor("op_902_dilations_0"), val = tensor([1, 1])]; + tensor var_902_groups_0 = const()[name = tensor("op_902_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60188096))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60165312))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_902_cast_fp16 = conv(dilations = var_902_dilations_0, groups = var_902_groups_0, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_902_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor obj_15_cast_fp16 = add(x = var_896_cast_fp16, y = var_902_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_913_to_fp16 = const()[name = tensor("op_913_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_913_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60392960)))]; + tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60395584)))]; + tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_931_pad_type_0 = const()[name = tensor("op_931_pad_type_0"), val = tensor("valid")]; + tensor var_931_strides_0 = const()[name = tensor("op_931_strides_0"), val = tensor([1, 1])]; + tensor var_931_pad_0 = const()[name = tensor("op_931_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_931_dilations_0 = const()[name = tensor("op_931_dilations_0"), val = tensor([1, 1])]; + tensor var_931_groups_0 = const()[name = tensor("op_931_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60398208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63675072))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63675200)))]; + tensor var_931_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_931_dilations_0, groups = var_931_groups_0, pad = var_931_pad_0, pad_type = var_931_pad_type_0, strides = var_931_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = tensor("op_931_cast_fp16")]; + tensor var_937_pad_type_0 = const()[name = tensor("op_937_pad_type_0"), val = tensor("valid")]; + tensor var_937_strides_0 = const()[name = tensor("op_937_strides_0"), val = tensor([1, 1])]; + tensor var_937_pad_0 = const()[name = tensor("op_937_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_937_dilations_0 = const()[name = tensor("op_937_dilations_0"), val = tensor([1, 1])]; + tensor var_937_groups_0 = const()[name = tensor("op_937_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63740864))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63685504))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_937_cast_fp16 = conv(dilations = var_937_dilations_0, groups = var_937_groups_0, pad = var_937_pad_0, pad_type = var_937_pad_type_0, strides = var_937_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = tensor("op_937_cast_fp16")]; + tensor input_29_cast_fp16 = add(x = var_931_cast_fp16, y = var_937_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; + tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_948_pad_type_0 = const()[name = tensor("op_948_pad_type_0"), val = tensor("valid")]; + tensor var_948_strides_0 = const()[name = tensor("op_948_strides_0"), val = tensor([1, 1])]; + tensor var_948_pad_0 = const()[name = tensor("op_948_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_948_dilations_0 = const()[name = tensor("op_948_dilations_0"), val = tensor([1, 1])]; + tensor var_948_groups_0 = const()[name = tensor("op_948_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64560128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67836992))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67837120)))]; + tensor var_948_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_948_dilations_0, groups = var_948_groups_0, pad = var_948_pad_0, pad_type = var_948_pad_type_0, strides = var_948_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_954_pad_type_0 = const()[name = tensor("op_954_pad_type_0"), val = tensor("valid")]; + tensor var_954_strides_0 = const()[name = tensor("op_954_strides_0"), val = tensor([1, 1])]; + tensor var_954_pad_0 = const()[name = tensor("op_954_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_954_dilations_0 = const()[name = tensor("op_954_dilations_0"), val = tensor([1, 1])]; + tensor var_954_groups_0 = const()[name = tensor("op_954_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68064960))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67839744))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_954_cast_fp16 = conv(dilations = var_954_dilations_0, groups = var_954_groups_0, pad = var_954_pad_0, pad_type = var_954_pad_type_0, strides = var_954_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_954_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_948_cast_fp16, y = var_954_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor var_964 = const()[name = tensor("op_964"), val = tensor(3)]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_983_to_fp16 = const()[name = tensor("op_983_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_983_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68884224)))]; + tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68886848)))]; + tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; + tensor var_1005_pad_type_0 = const()[name = tensor("op_1005_pad_type_0"), val = tensor("valid")]; + tensor var_1005_strides_0 = const()[name = tensor("op_1005_strides_0"), val = tensor([1, 1])]; + tensor var_1005_pad_0 = const()[name = tensor("op_1005_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1005_dilations_0 = const()[name = tensor("op_1005_dilations_0"), val = tensor([1, 1])]; + tensor var_1005_groups_0 = const()[name = tensor("op_1005_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68889472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69708736))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69708864)))]; + tensor var_1005_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1005_dilations_0, groups = var_1005_groups_0, pad = var_1005_pad_0, pad_type = var_1005_pad_type_0, strides = var_1005_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_1005_cast_fp16")]; + tensor var_1011_pad_type_0 = const()[name = tensor("op_1011_pad_type_0"), val = tensor("valid")]; + tensor var_1011_strides_0 = const()[name = tensor("op_1011_strides_0"), val = tensor([1, 1])]; + tensor var_1011_pad_0 = const()[name = tensor("op_1011_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1011_dilations_0 = const()[name = tensor("op_1011_dilations_0"), val = tensor([1, 1])]; + tensor var_1011_groups_0 = const()[name = tensor("op_1011_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69759808))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69711488))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1011_cast_fp16 = conv(dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_1011_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_1005_cast_fp16, y = var_1011_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_1020_pad_type_0 = const()[name = tensor("op_1020_pad_type_0"), val = tensor("valid")]; + tensor var_1020_strides_0 = const()[name = tensor("op_1020_strides_0"), val = tensor([1, 1])]; + tensor var_1020_pad_0 = const()[name = tensor("op_1020_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1020_dilations_0 = const()[name = tensor("op_1020_dilations_0"), val = tensor([1, 1])]; + tensor var_1020_groups_0 = const()[name = tensor("op_1020_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69964672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70783936))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1020_cast_fp16 = conv(dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_1020_cast_fp16")]; + tensor var_1026_pad_type_0 = const()[name = tensor("op_1026_pad_type_0"), val = tensor("valid")]; + tensor var_1026_strides_0 = const()[name = tensor("op_1026_strides_0"), val = tensor([1, 1])]; + tensor var_1026_pad_0 = const()[name = tensor("op_1026_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1026_dilations_0 = const()[name = tensor("op_1026_dilations_0"), val = tensor([1, 1])]; + tensor var_1026_groups_0 = const()[name = tensor("op_1026_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70823680))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70784064))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_1026_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1026_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_1036_pad_type_0 = const()[name = tensor("op_1036_pad_type_0"), val = tensor("valid")]; + tensor var_1036_strides_0 = const()[name = tensor("op_1036_strides_0"), val = tensor([1, 1])]; + tensor var_1036_pad_0 = const()[name = tensor("op_1036_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1036_dilations_0 = const()[name = tensor("op_1036_dilations_0"), val = tensor([1, 1])]; + tensor var_1036_groups_0 = const()[name = tensor("op_1036_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71028544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71847808))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71847936)))]; + tensor var_1036_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1036_dilations_0, groups = var_1036_groups_0, pad = var_1036_pad_0, pad_type = var_1036_pad_type_0, strides = var_1036_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_1036_cast_fp16")]; + tensor var_1042_pad_type_0 = const()[name = tensor("op_1042_pad_type_0"), val = tensor("valid")]; + tensor var_1042_strides_0 = const()[name = tensor("op_1042_strides_0"), val = tensor([1, 1])]; + tensor var_1042_pad_0 = const()[name = tensor("op_1042_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1042_dilations_0 = const()[name = tensor("op_1042_dilations_0"), val = tensor([1, 1])]; + tensor var_1042_groups_0 = const()[name = tensor("op_1042_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71883776))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71850560))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1042_cast_fp16 = conv(dilations = var_1042_dilations_0, groups = var_1042_groups_0, pad = var_1042_pad_0, pad_type = var_1042_pad_type_0, strides = var_1042_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_1036_cast_fp16, y = var_1042_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_1045, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_1047_to_fp16 = const()[name = tensor("op_1047_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1048_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1047_to_fp16)[name = tensor("op_1048_cast_fp16")]; + tensor var_1049 = const()[name = tensor("op_1049"), val = tensor([1, 20, 64, -1])]; + tensor var_1050_cast_fp16 = reshape(shape = var_1049, x = key_9_cast_fp16)[name = tensor("op_1050_cast_fp16")]; + tensor mh_w_9_transpose_x_0 = const()[name = tensor("mh_w_9_transpose_x_0"), val = tensor(true)]; + tensor mh_w_9_transpose_y_0 = const()[name = tensor("mh_w_9_transpose_y_0"), val = tensor(false)]; + tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1048_cast_fp16, y = var_1050_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_1053_cast_fp16 = softmax(axis = var_964, x = mh_w_9_cast_fp16)[name = tensor("op_1053_cast_fp16")]; + tensor var_1054 = const()[name = tensor("op_1054"), val = tensor([1, 20, 64, -1])]; + tensor var_1055_cast_fp16 = reshape(shape = var_1054, x = value_9_cast_fp16)[name = tensor("op_1055_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1055_cast_fp16, y = var_1053_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_1058 = const()[name = tensor("op_1058"), val = tensor([1, 1280, 1, -1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1058, x = attn_9_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1068_pad_type_0 = const()[name = tensor("op_1068_pad_type_0"), val = tensor("valid")]; + tensor var_1068_strides_0 = const()[name = tensor("op_1068_strides_0"), val = tensor([1, 1])]; + tensor var_1068_pad_0 = const()[name = tensor("op_1068_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1068_dilations_0 = const()[name = tensor("op_1068_dilations_0"), val = tensor([1, 1])]; + tensor var_1068_groups_0 = const()[name = tensor("op_1068_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72088640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72907904))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72908032)))]; + tensor var_1068_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1068_dilations_0, groups = var_1068_groups_0, pad = var_1068_pad_0, pad_type = var_1068_pad_type_0, strides = var_1068_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1068_cast_fp16")]; + tensor var_1074_pad_type_0 = const()[name = tensor("op_1074_pad_type_0"), val = tensor("valid")]; + tensor var_1074_strides_0 = const()[name = tensor("op_1074_strides_0"), val = tensor([1, 1])]; + tensor var_1074_pad_0 = const()[name = tensor("op_1074_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1074_dilations_0 = const()[name = tensor("op_1074_dilations_0"), val = tensor([1, 1])]; + tensor var_1074_groups_0 = const()[name = tensor("op_1074_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72933952))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72910656))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1074_cast_fp16 = conv(dilations = var_1074_dilations_0, groups = var_1074_groups_0, pad = var_1074_pad_0, pad_type = var_1074_pad_type_0, strides = var_1074_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1074_cast_fp16")]; + tensor obj_19_cast_fp16 = add(x = var_1068_cast_fp16, y = var_1074_cast_fp16)[name = tensor("obj_19_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1085_to_fp16 = const()[name = tensor("op_1085_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1085_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73138816)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73141440)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1103_pad_type_0 = const()[name = tensor("op_1103_pad_type_0"), val = tensor("valid")]; + tensor var_1103_strides_0 = const()[name = tensor("op_1103_strides_0"), val = tensor([1, 1])]; + tensor var_1103_pad_0 = const()[name = tensor("op_1103_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1103_dilations_0 = const()[name = tensor("op_1103_dilations_0"), val = tensor([1, 1])]; + tensor var_1103_groups_0 = const()[name = tensor("op_1103_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73144064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76420928))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76421056)))]; + tensor var_1103_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1103_dilations_0, groups = var_1103_groups_0, pad = var_1103_pad_0, pad_type = var_1103_pad_type_0, strides = var_1103_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1103_cast_fp16")]; + tensor var_1109_pad_type_0 = const()[name = tensor("op_1109_pad_type_0"), val = tensor("valid")]; + tensor var_1109_strides_0 = const()[name = tensor("op_1109_strides_0"), val = tensor([1, 1])]; + tensor var_1109_pad_0 = const()[name = tensor("op_1109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1109_dilations_0 = const()[name = tensor("op_1109_dilations_0"), val = tensor([1, 1])]; + tensor var_1109_groups_0 = const()[name = tensor("op_1109_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76440192))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76431360))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1109_cast_fp16 = conv(dilations = var_1109_dilations_0, groups = var_1109_groups_0, pad = var_1109_pad_0, pad_type = var_1109_pad_type_0, strides = var_1109_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1109_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1109_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1120_pad_type_0 = const()[name = tensor("op_1120_pad_type_0"), val = tensor("valid")]; + tensor var_1120_strides_0 = const()[name = tensor("op_1120_strides_0"), val = tensor([1, 1])]; + tensor var_1120_pad_0 = const()[name = tensor("op_1120_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1120_dilations_0 = const()[name = tensor("op_1120_dilations_0"), val = tensor([1, 1])]; + tensor var_1120_groups_0 = const()[name = tensor("op_1120_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77259456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80536320))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80536448)))]; + tensor var_1120_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1120_dilations_0, groups = var_1120_groups_0, pad = var_1120_pad_0, pad_type = var_1120_pad_type_0, strides = var_1120_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_1120_cast_fp16")]; + tensor var_1126_pad_type_0 = const()[name = tensor("op_1126_pad_type_0"), val = tensor("valid")]; + tensor var_1126_strides_0 = const()[name = tensor("op_1126_strides_0"), val = tensor([1, 1])]; + tensor var_1126_pad_0 = const()[name = tensor("op_1126_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1126_dilations_0 = const()[name = tensor("op_1126_dilations_0"), val = tensor([1, 1])]; + tensor var_1126_groups_0 = const()[name = tensor("op_1126_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80778304))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80539072))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1126_cast_fp16 = conv(dilations = var_1126_dilations_0, groups = var_1126_groups_0, pad = var_1126_pad_0, pad_type = var_1126_pad_type_0, strides = var_1126_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1126_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor var_1136 = const()[name = tensor("op_1136"), val = tensor(3)]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1155_to_fp16 = const()[name = tensor("op_1155_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1155_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81597568)))]; + tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81600192)))]; + tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor var_1177_pad_type_0 = const()[name = tensor("op_1177_pad_type_0"), val = tensor("valid")]; + tensor var_1177_strides_0 = const()[name = tensor("op_1177_strides_0"), val = tensor([1, 1])]; + tensor var_1177_pad_0 = const()[name = tensor("op_1177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1177_dilations_0 = const()[name = tensor("op_1177_dilations_0"), val = tensor([1, 1])]; + tensor var_1177_groups_0 = const()[name = tensor("op_1177_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81602816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82422080))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82422208)))]; + tensor var_1177_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1177_dilations_0, groups = var_1177_groups_0, pad = var_1177_pad_0, pad_type = var_1177_pad_type_0, strides = var_1177_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1177_cast_fp16")]; + tensor var_1183_pad_type_0 = const()[name = tensor("op_1183_pad_type_0"), val = tensor("valid")]; + tensor var_1183_strides_0 = const()[name = tensor("op_1183_strides_0"), val = tensor([1, 1])]; + tensor var_1183_pad_0 = const()[name = tensor("op_1183_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1183_dilations_0 = const()[name = tensor("op_1183_dilations_0"), val = tensor([1, 1])]; + tensor var_1183_groups_0 = const()[name = tensor("op_1183_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82473856))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82424832))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1183_cast_fp16 = conv(dilations = var_1183_dilations_0, groups = var_1183_groups_0, pad = var_1183_pad_0, pad_type = var_1183_pad_type_0, strides = var_1183_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1183_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_1177_cast_fp16, y = var_1183_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_1192_pad_type_0 = const()[name = tensor("op_1192_pad_type_0"), val = tensor("valid")]; + tensor var_1192_strides_0 = const()[name = tensor("op_1192_strides_0"), val = tensor([1, 1])]; + tensor var_1192_pad_0 = const()[name = tensor("op_1192_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1192_dilations_0 = const()[name = tensor("op_1192_dilations_0"), val = tensor([1, 1])]; + tensor var_1192_groups_0 = const()[name = tensor("op_1192_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82678720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83497984))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1192_cast_fp16 = conv(dilations = var_1192_dilations_0, groups = var_1192_groups_0, pad = var_1192_pad_0, pad_type = var_1192_pad_type_0, strides = var_1192_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1192_cast_fp16")]; + tensor var_1198_pad_type_0 = const()[name = tensor("op_1198_pad_type_0"), val = tensor("valid")]; + tensor var_1198_strides_0 = const()[name = tensor("op_1198_strides_0"), val = tensor([1, 1])]; + tensor var_1198_pad_0 = const()[name = tensor("op_1198_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1198_dilations_0 = const()[name = tensor("op_1198_dilations_0"), val = tensor([1, 1])]; + tensor var_1198_groups_0 = const()[name = tensor("op_1198_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83533248))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83498112))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1198_cast_fp16 = conv(dilations = var_1198_dilations_0, groups = var_1198_groups_0, pad = var_1198_pad_0, pad_type = var_1198_pad_type_0, strides = var_1198_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_1192_cast_fp16, y = var_1198_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_1208_pad_type_0 = const()[name = tensor("op_1208_pad_type_0"), val = tensor("valid")]; + tensor var_1208_strides_0 = const()[name = tensor("op_1208_strides_0"), val = tensor([1, 1])]; + tensor var_1208_pad_0 = const()[name = tensor("op_1208_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1208_dilations_0 = const()[name = tensor("op_1208_dilations_0"), val = tensor([1, 1])]; + tensor var_1208_groups_0 = const()[name = tensor("op_1208_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83738112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84557376))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84557504)))]; + tensor var_1208_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1208_dilations_0, groups = var_1208_groups_0, pad = var_1208_pad_0, pad_type = var_1208_pad_type_0, strides = var_1208_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_1208_cast_fp16")]; + tensor var_1214_pad_type_0 = const()[name = tensor("op_1214_pad_type_0"), val = tensor("valid")]; + tensor var_1214_strides_0 = const()[name = tensor("op_1214_strides_0"), val = tensor([1, 1])]; + tensor var_1214_pad_0 = const()[name = tensor("op_1214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1214_dilations_0 = const()[name = tensor("op_1214_dilations_0"), val = tensor([1, 1])]; + tensor var_1214_groups_0 = const()[name = tensor("op_1214_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84586624))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84560128))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1214_cast_fp16 = conv(dilations = var_1214_dilations_0, groups = var_1214_groups_0, pad = var_1214_pad_0, pad_type = var_1214_pad_type_0, strides = var_1214_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_1208_cast_fp16, y = var_1214_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_1217 = const()[name = tensor("op_1217"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_1217, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_1219_to_fp16 = const()[name = tensor("op_1219_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1220_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1219_to_fp16)[name = tensor("op_1220_cast_fp16")]; + tensor var_1221 = const()[name = tensor("op_1221"), val = tensor([1, 20, 64, -1])]; + tensor var_1222_cast_fp16 = reshape(shape = var_1221, x = key_11_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1220_cast_fp16, y = var_1222_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor var_1225_cast_fp16 = softmax(axis = var_1136, x = mh_w_11_cast_fp16)[name = tensor("op_1225_cast_fp16")]; + tensor var_1226 = const()[name = tensor("op_1226"), val = tensor([1, 20, 64, -1])]; + tensor var_1227_cast_fp16 = reshape(shape = var_1226, x = value_11_cast_fp16)[name = tensor("op_1227_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1227_cast_fp16, y = var_1225_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_1230 = const()[name = tensor("op_1230"), val = tensor([1, 1280, 1, -1])]; + tensor input_41_cast_fp16 = reshape(shape = var_1230, x = attn_11_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1240_pad_type_0 = const()[name = tensor("op_1240_pad_type_0"), val = tensor("valid")]; + tensor var_1240_strides_0 = const()[name = tensor("op_1240_strides_0"), val = tensor([1, 1])]; + tensor var_1240_pad_0 = const()[name = tensor("op_1240_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1240_dilations_0 = const()[name = tensor("op_1240_dilations_0"), val = tensor([1, 1])]; + tensor var_1240_groups_0 = const()[name = tensor("op_1240_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84791488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85610752))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85610880)))]; + tensor var_1240_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1240_dilations_0, groups = var_1240_groups_0, pad = var_1240_pad_0, pad_type = var_1240_pad_type_0, strides = var_1240_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_1240_cast_fp16")]; + tensor var_1246_pad_type_0 = const()[name = tensor("op_1246_pad_type_0"), val = tensor("valid")]; + tensor var_1246_strides_0 = const()[name = tensor("op_1246_strides_0"), val = tensor([1, 1])]; + tensor var_1246_pad_0 = const()[name = tensor("op_1246_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1246_dilations_0 = const()[name = tensor("op_1246_dilations_0"), val = tensor([1, 1])]; + tensor var_1246_groups_0 = const()[name = tensor("op_1246_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85634944))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85613504))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1246_cast_fp16 = conv(dilations = var_1246_dilations_0, groups = var_1246_groups_0, pad = var_1246_pad_0, pad_type = var_1246_pad_type_0, strides = var_1246_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor obj_23_cast_fp16 = add(x = var_1240_cast_fp16, y = var_1246_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1257_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85839808)))]; + tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85842432)))]; + tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1275_pad_type_0 = const()[name = tensor("op_1275_pad_type_0"), val = tensor("valid")]; + tensor var_1275_strides_0 = const()[name = tensor("op_1275_strides_0"), val = tensor([1, 1])]; + tensor var_1275_pad_0 = const()[name = tensor("op_1275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1275_dilations_0 = const()[name = tensor("op_1275_dilations_0"), val = tensor([1, 1])]; + tensor var_1275_groups_0 = const()[name = tensor("op_1275_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85845056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89121920))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89122048)))]; + tensor var_1275_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1275_dilations_0, groups = var_1275_groups_0, pad = var_1275_pad_0, pad_type = var_1275_pad_type_0, strides = var_1275_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_1275_cast_fp16")]; + tensor var_1281_pad_type_0 = const()[name = tensor("op_1281_pad_type_0"), val = tensor("valid")]; + tensor var_1281_strides_0 = const()[name = tensor("op_1281_strides_0"), val = tensor([1, 1])]; + tensor var_1281_pad_0 = const()[name = tensor("op_1281_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1281_dilations_0 = const()[name = tensor("op_1281_dilations_0"), val = tensor([1, 1])]; + tensor var_1281_groups_0 = const()[name = tensor("op_1281_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89166464))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89132352))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1281_cast_fp16 = conv(dilations = var_1281_dilations_0, groups = var_1281_groups_0, pad = var_1281_pad_0, pad_type = var_1281_pad_type_0, strides = var_1281_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_1281_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = var_1275_cast_fp16, y = var_1281_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; + tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_1292_pad_type_0 = const()[name = tensor("op_1292_pad_type_0"), val = tensor("valid")]; + tensor var_1292_strides_0 = const()[name = tensor("op_1292_strides_0"), val = tensor([1, 1])]; + tensor var_1292_pad_0 = const()[name = tensor("op_1292_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1292_dilations_0 = const()[name = tensor("op_1292_dilations_0"), val = tensor([1, 1])]; + tensor var_1292_groups_0 = const()[name = tensor("op_1292_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89985728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93262592))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93262720)))]; + tensor var_1292_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1292_dilations_0, groups = var_1292_groups_0, pad = var_1292_pad_0, pad_type = var_1292_pad_type_0, strides = var_1292_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1298_pad_type_0 = const()[name = tensor("op_1298_pad_type_0"), val = tensor("valid")]; + tensor var_1298_strides_0 = const()[name = tensor("op_1298_strides_0"), val = tensor([1, 1])]; + tensor var_1298_pad_0 = const()[name = tensor("op_1298_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1298_dilations_0 = const()[name = tensor("op_1298_dilations_0"), val = tensor([1, 1])]; + tensor var_1298_groups_0 = const()[name = tensor("op_1298_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93508032))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93265344))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1298_cast_fp16 = conv(dilations = var_1298_dilations_0, groups = var_1298_groups_0, pad = var_1298_pad_0, pad_type = var_1298_pad_type_0, strides = var_1298_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_1292_cast_fp16, y = var_1298_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_1308 = const()[name = tensor("op_1308"), val = tensor(3)]; + tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; + tensor var_1327_to_fp16 = const()[name = tensor("op_1327_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1327_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94327296)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94329920)))]; + tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor var_1349_pad_type_0 = const()[name = tensor("op_1349_pad_type_0"), val = tensor("valid")]; + tensor var_1349_strides_0 = const()[name = tensor("op_1349_strides_0"), val = tensor([1, 1])]; + tensor var_1349_pad_0 = const()[name = tensor("op_1349_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1349_dilations_0 = const()[name = tensor("op_1349_dilations_0"), val = tensor([1, 1])]; + tensor var_1349_groups_0 = const()[name = tensor("op_1349_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94332544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95151808))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95151936)))]; + tensor var_1349_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1349_dilations_0, groups = var_1349_groups_0, pad = var_1349_pad_0, pad_type = var_1349_pad_type_0, strides = var_1349_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1355_pad_type_0 = const()[name = tensor("op_1355_pad_type_0"), val = tensor("valid")]; + tensor var_1355_strides_0 = const()[name = tensor("op_1355_strides_0"), val = tensor([1, 1])]; + tensor var_1355_pad_0 = const()[name = tensor("op_1355_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1355_dilations_0 = const()[name = tensor("op_1355_dilations_0"), val = tensor([1, 1])]; + tensor var_1355_groups_0 = const()[name = tensor("op_1355_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95204224))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95154560))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1355_cast_fp16 = conv(dilations = var_1355_dilations_0, groups = var_1355_groups_0, pad = var_1355_pad_0, pad_type = var_1355_pad_type_0, strides = var_1355_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1355_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1349_cast_fp16, y = var_1355_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1364_pad_type_0 = const()[name = tensor("op_1364_pad_type_0"), val = tensor("valid")]; + tensor var_1364_strides_0 = const()[name = tensor("op_1364_strides_0"), val = tensor([1, 1])]; + tensor var_1364_pad_0 = const()[name = tensor("op_1364_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1364_dilations_0 = const()[name = tensor("op_1364_dilations_0"), val = tensor([1, 1])]; + tensor var_1364_groups_0 = const()[name = tensor("op_1364_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95409088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96228352))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1364_cast_fp16 = conv(dilations = var_1364_dilations_0, groups = var_1364_groups_0, pad = var_1364_pad_0, pad_type = var_1364_pad_type_0, strides = var_1364_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1370_pad_type_0 = const()[name = tensor("op_1370_pad_type_0"), val = tensor("valid")]; + tensor var_1370_strides_0 = const()[name = tensor("op_1370_strides_0"), val = tensor([1, 1])]; + tensor var_1370_pad_0 = const()[name = tensor("op_1370_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1370_dilations_0 = const()[name = tensor("op_1370_dilations_0"), val = tensor([1, 1])]; + tensor var_1370_groups_0 = const()[name = tensor("op_1370_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96263168))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96228480))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1370_cast_fp16 = conv(dilations = var_1370_dilations_0, groups = var_1370_groups_0, pad = var_1370_pad_0, pad_type = var_1370_pad_type_0, strides = var_1370_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1370_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1370_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1380_pad_type_0 = const()[name = tensor("op_1380_pad_type_0"), val = tensor("valid")]; + tensor var_1380_strides_0 = const()[name = tensor("op_1380_strides_0"), val = tensor([1, 1])]; + tensor var_1380_pad_0 = const()[name = tensor("op_1380_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1380_dilations_0 = const()[name = tensor("op_1380_dilations_0"), val = tensor([1, 1])]; + tensor var_1380_groups_0 = const()[name = tensor("op_1380_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96468032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97287296))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97287424)))]; + tensor var_1380_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1380_dilations_0, groups = var_1380_groups_0, pad = var_1380_pad_0, pad_type = var_1380_pad_type_0, strides = var_1380_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_1380_cast_fp16")]; + tensor var_1386_pad_type_0 = const()[name = tensor("op_1386_pad_type_0"), val = tensor("valid")]; + tensor var_1386_strides_0 = const()[name = tensor("op_1386_strides_0"), val = tensor([1, 1])]; + tensor var_1386_pad_0 = const()[name = tensor("op_1386_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1386_dilations_0 = const()[name = tensor("op_1386_dilations_0"), val = tensor([1, 1])]; + tensor var_1386_groups_0 = const()[name = tensor("op_1386_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97313920))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97290048))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1386_cast_fp16 = conv(dilations = var_1386_dilations_0, groups = var_1386_groups_0, pad = var_1386_pad_0, pad_type = var_1386_pad_type_0, strides = var_1386_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1380_cast_fp16, y = var_1386_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1389 = const()[name = tensor("op_1389"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1389, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1391_to_fp16 = const()[name = tensor("op_1391_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1392_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1391_to_fp16)[name = tensor("op_1392_cast_fp16")]; + tensor var_1393 = const()[name = tensor("op_1393"), val = tensor([1, 20, 64, -1])]; + tensor var_1394_cast_fp16 = reshape(shape = var_1393, x = key_13_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1392_cast_fp16, y = var_1394_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor var_1397_cast_fp16 = softmax(axis = var_1308, x = mh_w_13_cast_fp16)[name = tensor("op_1397_cast_fp16")]; + tensor var_1398 = const()[name = tensor("op_1398"), val = tensor([1, 20, 64, -1])]; + tensor var_1399_cast_fp16 = reshape(shape = var_1398, x = value_13_cast_fp16)[name = tensor("op_1399_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1399_cast_fp16, y = var_1397_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1402 = const()[name = tensor("op_1402"), val = tensor([1, 1280, 1, -1])]; + tensor input_49_cast_fp16 = reshape(shape = var_1402, x = attn_13_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1412_pad_type_0 = const()[name = tensor("op_1412_pad_type_0"), val = tensor("valid")]; + tensor var_1412_strides_0 = const()[name = tensor("op_1412_strides_0"), val = tensor([1, 1])]; + tensor var_1412_pad_0 = const()[name = tensor("op_1412_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1412_dilations_0 = const()[name = tensor("op_1412_dilations_0"), val = tensor([1, 1])]; + tensor var_1412_groups_0 = const()[name = tensor("op_1412_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97518784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98338048))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98338176)))]; + tensor var_1412_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1412_dilations_0, groups = var_1412_groups_0, pad = var_1412_pad_0, pad_type = var_1412_pad_type_0, strides = var_1412_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_1412_cast_fp16")]; + tensor var_1418_pad_type_0 = const()[name = tensor("op_1418_pad_type_0"), val = tensor("valid")]; + tensor var_1418_strides_0 = const()[name = tensor("op_1418_strides_0"), val = tensor([1, 1])]; + tensor var_1418_pad_0 = const()[name = tensor("op_1418_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1418_dilations_0 = const()[name = tensor("op_1418_dilations_0"), val = tensor([1, 1])]; + tensor var_1418_groups_0 = const()[name = tensor("op_1418_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98362688))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98340800))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1418_cast_fp16 = conv(dilations = var_1418_dilations_0, groups = var_1418_groups_0, pad = var_1418_pad_0, pad_type = var_1418_pad_type_0, strides = var_1418_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor obj_27_cast_fp16 = add(x = var_1412_cast_fp16, y = var_1418_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; + tensor var_1429_to_fp16 = const()[name = tensor("op_1429_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1429_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98567552)))]; + tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98570176)))]; + tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1447_pad_type_0 = const()[name = tensor("op_1447_pad_type_0"), val = tensor("valid")]; + tensor var_1447_strides_0 = const()[name = tensor("op_1447_strides_0"), val = tensor([1, 1])]; + tensor var_1447_pad_0 = const()[name = tensor("op_1447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1447_dilations_0 = const()[name = tensor("op_1447_dilations_0"), val = tensor([1, 1])]; + tensor var_1447_groups_0 = const()[name = tensor("op_1447_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98572800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101849664))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101849792)))]; + tensor var_1447_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1447_dilations_0, groups = var_1447_groups_0, pad = var_1447_pad_0, pad_type = var_1447_pad_type_0, strides = var_1447_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_1447_cast_fp16")]; + tensor var_1453_pad_type_0 = const()[name = tensor("op_1453_pad_type_0"), val = tensor("valid")]; + tensor var_1453_strides_0 = const()[name = tensor("op_1453_strides_0"), val = tensor([1, 1])]; + tensor var_1453_pad_0 = const()[name = tensor("op_1453_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1453_dilations_0 = const()[name = tensor("op_1453_dilations_0"), val = tensor([1, 1])]; + tensor var_1453_groups_0 = const()[name = tensor("op_1453_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101881792))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101860096))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1453_cast_fp16 = conv(dilations = var_1453_dilations_0, groups = var_1453_groups_0, pad = var_1453_pad_0, pad_type = var_1453_pad_type_0, strides = var_1453_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_1453_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = var_1447_cast_fp16, y = var_1453_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; + tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1464_pad_type_0 = const()[name = tensor("op_1464_pad_type_0"), val = tensor("valid")]; + tensor var_1464_strides_0 = const()[name = tensor("op_1464_strides_0"), val = tensor([1, 1])]; + tensor var_1464_pad_0 = const()[name = tensor("op_1464_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1464_dilations_0 = const()[name = tensor("op_1464_dilations_0"), val = tensor([1, 1])]; + tensor var_1464_groups_0 = const()[name = tensor("op_1464_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102701056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105977920))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105978048)))]; + tensor var_1464_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1464_dilations_0, groups = var_1464_groups_0, pad = var_1464_pad_0, pad_type = var_1464_pad_type_0, strides = var_1464_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + tensor var_1470_pad_type_0 = const()[name = tensor("op_1470_pad_type_0"), val = tensor("valid")]; + tensor var_1470_strides_0 = const()[name = tensor("op_1470_strides_0"), val = tensor([1, 1])]; + tensor var_1470_pad_0 = const()[name = tensor("op_1470_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1470_dilations_0 = const()[name = tensor("op_1470_dilations_0"), val = tensor([1, 1])]; + tensor var_1470_groups_0 = const()[name = tensor("op_1470_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106227584))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105980672))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1470_cast_fp16 = conv(dilations = var_1470_dilations_0, groups = var_1470_groups_0, pad = var_1470_pad_0, pad_type = var_1470_pad_type_0, strides = var_1470_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_1464_cast_fp16, y = var_1470_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor var_1480 = const()[name = tensor("op_1480"), val = tensor(3)]; + tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; + tensor var_1499_to_fp16 = const()[name = tensor("op_1499_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1499_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107046848)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107049472)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_1521_pad_type_0 = const()[name = tensor("op_1521_pad_type_0"), val = tensor("valid")]; + tensor var_1521_strides_0 = const()[name = tensor("op_1521_strides_0"), val = tensor([1, 1])]; + tensor var_1521_pad_0 = const()[name = tensor("op_1521_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1521_dilations_0 = const()[name = tensor("op_1521_dilations_0"), val = tensor([1, 1])]; + tensor var_1521_groups_0 = const()[name = tensor("op_1521_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107052096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107871360))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107871488)))]; + tensor var_1521_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1521_dilations_0, groups = var_1521_groups_0, pad = var_1521_pad_0, pad_type = var_1521_pad_type_0, strides = var_1521_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1521_cast_fp16")]; + tensor var_1527_pad_type_0 = const()[name = tensor("op_1527_pad_type_0"), val = tensor("valid")]; + tensor var_1527_strides_0 = const()[name = tensor("op_1527_strides_0"), val = tensor([1, 1])]; + tensor var_1527_pad_0 = const()[name = tensor("op_1527_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1527_dilations_0 = const()[name = tensor("op_1527_dilations_0"), val = tensor([1, 1])]; + tensor var_1527_groups_0 = const()[name = tensor("op_1527_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107923264))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107874112))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1527_cast_fp16 = conv(dilations = var_1527_dilations_0, groups = var_1527_groups_0, pad = var_1527_pad_0, pad_type = var_1527_pad_type_0, strides = var_1527_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1527_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1521_cast_fp16, y = var_1527_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1536_pad_type_0 = const()[name = tensor("op_1536_pad_type_0"), val = tensor("valid")]; + tensor var_1536_strides_0 = const()[name = tensor("op_1536_strides_0"), val = tensor([1, 1])]; + tensor var_1536_pad_0 = const()[name = tensor("op_1536_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1536_dilations_0 = const()[name = tensor("op_1536_dilations_0"), val = tensor([1, 1])]; + tensor var_1536_groups_0 = const()[name = tensor("op_1536_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108128128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108947392))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1536_cast_fp16 = conv(dilations = var_1536_dilations_0, groups = var_1536_groups_0, pad = var_1536_pad_0, pad_type = var_1536_pad_type_0, strides = var_1536_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1542_pad_type_0 = const()[name = tensor("op_1542_pad_type_0"), val = tensor("valid")]; + tensor var_1542_strides_0 = const()[name = tensor("op_1542_strides_0"), val = tensor([1, 1])]; + tensor var_1542_pad_0 = const()[name = tensor("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1542_dilations_0 = const()[name = tensor("op_1542_dilations_0"), val = tensor([1, 1])]; + tensor var_1542_groups_0 = const()[name = tensor("op_1542_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108983872))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108947520))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1542_cast_fp16 = conv(dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1542_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1536_cast_fp16, y = var_1542_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1552_pad_type_0 = const()[name = tensor("op_1552_pad_type_0"), val = tensor("valid")]; + tensor var_1552_strides_0 = const()[name = tensor("op_1552_strides_0"), val = tensor([1, 1])]; + tensor var_1552_pad_0 = const()[name = tensor("op_1552_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1552_dilations_0 = const()[name = tensor("op_1552_dilations_0"), val = tensor([1, 1])]; + tensor var_1552_groups_0 = const()[name = tensor("op_1552_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109188736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110008000))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110008128)))]; + tensor var_1552_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1552_dilations_0, groups = var_1552_groups_0, pad = var_1552_pad_0, pad_type = var_1552_pad_type_0, strides = var_1552_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1558_pad_type_0 = const()[name = tensor("op_1558_pad_type_0"), val = tensor("valid")]; + tensor var_1558_strides_0 = const()[name = tensor("op_1558_strides_0"), val = tensor([1, 1])]; + tensor var_1558_pad_0 = const()[name = tensor("op_1558_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1558_dilations_0 = const()[name = tensor("op_1558_dilations_0"), val = tensor([1, 1])]; + tensor var_1558_groups_0 = const()[name = tensor("op_1558_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110034304))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110010752))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1558_cast_fp16 = conv(dilations = var_1558_dilations_0, groups = var_1558_groups_0, pad = var_1558_pad_0, pad_type = var_1558_pad_type_0, strides = var_1558_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_1558_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1552_cast_fp16, y = var_1558_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1561 = const()[name = tensor("op_1561"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1561, x = query_15_cast_fp16)[name = tensor("mh_q_15_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1564_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1565 = const()[name = tensor("op_1565"), val = tensor([1, 20, 64, -1])]; + tensor var_1566_cast_fp16 = reshape(shape = var_1565, x = key_15_cast_fp16)[name = tensor("op_1566_cast_fp16")]; + tensor mh_w_15_transpose_x_0 = const()[name = tensor("mh_w_15_transpose_x_0"), val = tensor(true)]; + tensor mh_w_15_transpose_y_0 = const()[name = tensor("mh_w_15_transpose_y_0"), val = tensor(false)]; + tensor mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1564_cast_fp16, y = var_1566_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_1569_cast_fp16 = softmax(axis = var_1480, x = mh_w_15_cast_fp16)[name = tensor("op_1569_cast_fp16")]; + tensor var_1570 = const()[name = tensor("op_1570"), val = tensor([1, 20, 64, -1])]; + tensor var_1571_cast_fp16 = reshape(shape = var_1570, x = value_15_cast_fp16)[name = tensor("op_1571_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1571_cast_fp16, y = var_1569_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1574 = const()[name = tensor("op_1574"), val = tensor([1, 1280, 1, -1])]; + tensor input_57_cast_fp16 = reshape(shape = var_1574, x = attn_15_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_1584_pad_type_0 = const()[name = tensor("op_1584_pad_type_0"), val = tensor("valid")]; + tensor var_1584_strides_0 = const()[name = tensor("op_1584_strides_0"), val = tensor([1, 1])]; + tensor var_1584_pad_0 = const()[name = tensor("op_1584_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1584_dilations_0 = const()[name = tensor("op_1584_dilations_0"), val = tensor([1, 1])]; + tensor var_1584_groups_0 = const()[name = tensor("op_1584_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110239168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111058432))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111058560)))]; + tensor var_1584_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1584_dilations_0, groups = var_1584_groups_0, pad = var_1584_pad_0, pad_type = var_1584_pad_type_0, strides = var_1584_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1590_pad_type_0 = const()[name = tensor("op_1590_pad_type_0"), val = tensor("valid")]; + tensor var_1590_strides_0 = const()[name = tensor("op_1590_strides_0"), val = tensor([1, 1])]; + tensor var_1590_pad_0 = const()[name = tensor("op_1590_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1590_dilations_0 = const()[name = tensor("op_1590_dilations_0"), val = tensor([1, 1])]; + tensor var_1590_groups_0 = const()[name = tensor("op_1590_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111080064))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111061184))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1590_cast_fp16 = conv(dilations = var_1590_dilations_0, groups = var_1590_groups_0, pad = var_1590_pad_0, pad_type = var_1590_pad_type_0, strides = var_1590_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = tensor("op_1590_cast_fp16")]; + tensor obj_31_cast_fp16 = add(x = var_1584_cast_fp16, y = var_1590_cast_fp16)[name = tensor("obj_31_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; + tensor var_1601_to_fp16 = const()[name = tensor("op_1601_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1601_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111284928)))]; + tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111287552)))]; + tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1619_pad_type_0 = const()[name = tensor("op_1619_pad_type_0"), val = tensor("valid")]; + tensor var_1619_strides_0 = const()[name = tensor("op_1619_strides_0"), val = tensor([1, 1])]; + tensor var_1619_pad_0 = const()[name = tensor("op_1619_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1619_dilations_0 = const()[name = tensor("op_1619_dilations_0"), val = tensor([1, 1])]; + tensor var_1619_groups_0 = const()[name = tensor("op_1619_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111290176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114567040))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114567168)))]; + tensor var_1619_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1619_dilations_0, groups = var_1619_groups_0, pad = var_1619_pad_0, pad_type = var_1619_pad_type_0, strides = var_1619_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_1619_cast_fp16")]; + tensor var_1625_pad_type_0 = const()[name = tensor("op_1625_pad_type_0"), val = tensor("valid")]; + tensor var_1625_strides_0 = const()[name = tensor("op_1625_strides_0"), val = tensor([1, 1])]; + tensor var_1625_pad_0 = const()[name = tensor("op_1625_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1625_dilations_0 = const()[name = tensor("op_1625_dilations_0"), val = tensor([1, 1])]; + tensor var_1625_groups_0 = const()[name = tensor("op_1625_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114597440))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114577472))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1625_cast_fp16 = conv(dilations = var_1625_dilations_0, groups = var_1625_groups_0, pad = var_1625_pad_0, pad_type = var_1625_pad_type_0, strides = var_1625_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_1625_cast_fp16")]; + tensor input_61_cast_fp16 = add(x = var_1619_cast_fp16, y = var_1625_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; + tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_1636_pad_type_0 = const()[name = tensor("op_1636_pad_type_0"), val = tensor("valid")]; + tensor var_1636_strides_0 = const()[name = tensor("op_1636_strides_0"), val = tensor([1, 1])]; + tensor var_1636_pad_0 = const()[name = tensor("op_1636_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1636_dilations_0 = const()[name = tensor("op_1636_dilations_0"), val = tensor([1, 1])]; + tensor var_1636_groups_0 = const()[name = tensor("op_1636_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115416704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118693568))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118693696)))]; + tensor var_1636_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1636_dilations_0, groups = var_1636_groups_0, pad = var_1636_pad_0, pad_type = var_1636_pad_type_0, strides = var_1636_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_1636_cast_fp16")]; + tensor var_1642_pad_type_0 = const()[name = tensor("op_1642_pad_type_0"), val = tensor("valid")]; + tensor var_1642_strides_0 = const()[name = tensor("op_1642_strides_0"), val = tensor([1, 1])]; + tensor var_1642_pad_0 = const()[name = tensor("op_1642_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1642_dilations_0 = const()[name = tensor("op_1642_dilations_0"), val = tensor([1, 1])]; + tensor var_1642_groups_0 = const()[name = tensor("op_1642_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118968000))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118696320))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1642_cast_fp16 = conv(dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_1642_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_1636_cast_fp16, y = var_1642_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor var_1652 = const()[name = tensor("op_1652"), val = tensor(3)]; + tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; + tensor var_1671_to_fp16 = const()[name = tensor("op_1671_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1671_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119787264)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119789888)))]; + tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; + tensor var_1693_pad_type_0 = const()[name = tensor("op_1693_pad_type_0"), val = tensor("valid")]; + tensor var_1693_strides_0 = const()[name = tensor("op_1693_strides_0"), val = tensor([1, 1])]; + tensor var_1693_pad_0 = const()[name = tensor("op_1693_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1693_dilations_0 = const()[name = tensor("op_1693_dilations_0"), val = tensor([1, 1])]; + tensor var_1693_groups_0 = const()[name = tensor("op_1693_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119792512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120611776))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120611904)))]; + tensor var_1693_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1693_dilations_0, groups = var_1693_groups_0, pad = var_1693_pad_0, pad_type = var_1693_pad_type_0, strides = var_1693_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1699_pad_type_0 = const()[name = tensor("op_1699_pad_type_0"), val = tensor("valid")]; + tensor var_1699_strides_0 = const()[name = tensor("op_1699_strides_0"), val = tensor([1, 1])]; + tensor var_1699_pad_0 = const()[name = tensor("op_1699_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1699_dilations_0 = const()[name = tensor("op_1699_dilations_0"), val = tensor([1, 1])]; + tensor var_1699_groups_0 = const()[name = tensor("op_1699_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120662848))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120614528))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1699_cast_fp16 = conv(dilations = var_1699_dilations_0, groups = var_1699_groups_0, pad = var_1699_pad_0, pad_type = var_1699_pad_type_0, strides = var_1699_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1699_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1693_cast_fp16, y = var_1699_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1708_pad_type_0 = const()[name = tensor("op_1708_pad_type_0"), val = tensor("valid")]; + tensor var_1708_strides_0 = const()[name = tensor("op_1708_strides_0"), val = tensor([1, 1])]; + tensor var_1708_pad_0 = const()[name = tensor("op_1708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1708_dilations_0 = const()[name = tensor("op_1708_dilations_0"), val = tensor([1, 1])]; + tensor var_1708_groups_0 = const()[name = tensor("op_1708_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120867712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121686976))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1708_cast_fp16 = conv(dilations = var_1708_dilations_0, groups = var_1708_groups_0, pad = var_1708_pad_0, pad_type = var_1708_pad_type_0, strides = var_1708_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1708_cast_fp16")]; + tensor var_1714_pad_type_0 = const()[name = tensor("op_1714_pad_type_0"), val = tensor("valid")]; + tensor var_1714_strides_0 = const()[name = tensor("op_1714_strides_0"), val = tensor([1, 1])]; + tensor var_1714_pad_0 = const()[name = tensor("op_1714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1714_dilations_0 = const()[name = tensor("op_1714_dilations_0"), val = tensor([1, 1])]; + tensor var_1714_groups_0 = const()[name = tensor("op_1714_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121721856))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121687104))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1714_cast_fp16 = conv(dilations = var_1714_dilations_0, groups = var_1714_groups_0, pad = var_1714_pad_0, pad_type = var_1714_pad_type_0, strides = var_1714_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1714_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1708_cast_fp16, y = var_1714_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1724_pad_type_0 = const()[name = tensor("op_1724_pad_type_0"), val = tensor("valid")]; + tensor var_1724_strides_0 = const()[name = tensor("op_1724_strides_0"), val = tensor([1, 1])]; + tensor var_1724_pad_0 = const()[name = tensor("op_1724_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1724_dilations_0 = const()[name = tensor("op_1724_dilations_0"), val = tensor([1, 1])]; + tensor var_1724_groups_0 = const()[name = tensor("op_1724_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121926720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122745984))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122746112)))]; + tensor var_1724_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1724_dilations_0, groups = var_1724_groups_0, pad = var_1724_pad_0, pad_type = var_1724_pad_type_0, strides = var_1724_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_1724_cast_fp16")]; + tensor var_1730_pad_type_0 = const()[name = tensor("op_1730_pad_type_0"), val = tensor("valid")]; + tensor var_1730_strides_0 = const()[name = tensor("op_1730_strides_0"), val = tensor([1, 1])]; + tensor var_1730_pad_0 = const()[name = tensor("op_1730_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1730_dilations_0 = const()[name = tensor("op_1730_dilations_0"), val = tensor([1, 1])]; + tensor var_1730_groups_0 = const()[name = tensor("op_1730_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122771968))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122748736))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1730_cast_fp16 = conv(dilations = var_1730_dilations_0, groups = var_1730_groups_0, pad = var_1730_pad_0, pad_type = var_1730_pad_type_0, strides = var_1730_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_1730_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1724_cast_fp16, y = var_1730_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1733 = const()[name = tensor("op_1733"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1733, x = query_17_cast_fp16)[name = tensor("mh_q_17_cast_fp16")]; + tensor var_1735_to_fp16 = const()[name = tensor("op_1735_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1736_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1735_to_fp16)[name = tensor("op_1736_cast_fp16")]; + tensor var_1737 = const()[name = tensor("op_1737"), val = tensor([1, 20, 64, -1])]; + tensor var_1738_cast_fp16 = reshape(shape = var_1737, x = key_17_cast_fp16)[name = tensor("op_1738_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1736_cast_fp16, y = var_1738_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor var_1741_cast_fp16 = softmax(axis = var_1652, x = mh_w_17_cast_fp16)[name = tensor("op_1741_cast_fp16")]; + tensor var_1742 = const()[name = tensor("op_1742"), val = tensor([1, 20, 64, -1])]; + tensor var_1743_cast_fp16 = reshape(shape = var_1742, x = value_17_cast_fp16)[name = tensor("op_1743_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1743_cast_fp16, y = var_1741_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1746 = const()[name = tensor("op_1746"), val = tensor([1, 1280, 1, -1])]; + tensor input_65_cast_fp16 = reshape(shape = var_1746, x = attn_17_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_1756_pad_type_0 = const()[name = tensor("op_1756_pad_type_0"), val = tensor("valid")]; + tensor var_1756_strides_0 = const()[name = tensor("op_1756_strides_0"), val = tensor([1, 1])]; + tensor var_1756_pad_0 = const()[name = tensor("op_1756_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1756_dilations_0 = const()[name = tensor("op_1756_dilations_0"), val = tensor([1, 1])]; + tensor var_1756_groups_0 = const()[name = tensor("op_1756_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122976832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123796096))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123796224)))]; + tensor var_1756_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1756_dilations_0, groups = var_1756_groups_0, pad = var_1756_pad_0, pad_type = var_1756_pad_type_0, strides = var_1756_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_1756_cast_fp16")]; + tensor var_1762_pad_type_0 = const()[name = tensor("op_1762_pad_type_0"), val = tensor("valid")]; + tensor var_1762_strides_0 = const()[name = tensor("op_1762_strides_0"), val = tensor([1, 1])]; + tensor var_1762_pad_0 = const()[name = tensor("op_1762_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1762_dilations_0 = const()[name = tensor("op_1762_dilations_0"), val = tensor([1, 1])]; + tensor var_1762_groups_0 = const()[name = tensor("op_1762_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123819136))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123798848))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1762_cast_fp16 = conv(dilations = var_1762_dilations_0, groups = var_1762_groups_0, pad = var_1762_pad_0, pad_type = var_1762_pad_type_0, strides = var_1762_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_1762_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_1756_cast_fp16, y = var_1762_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; + tensor var_1773_to_fp16 = const()[name = tensor("op_1773_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1773_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124024000)))]; + tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124026624)))]; + tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_1791_pad_type_0 = const()[name = tensor("op_1791_pad_type_0"), val = tensor("valid")]; + tensor var_1791_strides_0 = const()[name = tensor("op_1791_strides_0"), val = tensor([1, 1])]; + tensor var_1791_pad_0 = const()[name = tensor("op_1791_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1791_dilations_0 = const()[name = tensor("op_1791_dilations_0"), val = tensor([1, 1])]; + tensor var_1791_groups_0 = const()[name = tensor("op_1791_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124029248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127306112))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127306240)))]; + tensor var_1791_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1791_dilations_0, groups = var_1791_groups_0, pad = var_1791_pad_0, pad_type = var_1791_pad_type_0, strides = var_1791_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor("op_1791_cast_fp16")]; + tensor var_1797_pad_type_0 = const()[name = tensor("op_1797_pad_type_0"), val = tensor("valid")]; + tensor var_1797_strides_0 = const()[name = tensor("op_1797_strides_0"), val = tensor([1, 1])]; + tensor var_1797_pad_0 = const()[name = tensor("op_1797_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1797_dilations_0 = const()[name = tensor("op_1797_dilations_0"), val = tensor([1, 1])]; + tensor var_1797_groups_0 = const()[name = tensor("op_1797_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127346368))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127316544))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1797_cast_fp16 = conv(dilations = var_1797_dilations_0, groups = var_1797_groups_0, pad = var_1797_pad_0, pad_type = var_1797_pad_type_0, strides = var_1797_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = tensor("op_1797_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_1791_cast_fp16, y = var_1797_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; + tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_1808_pad_type_0 = const()[name = tensor("op_1808_pad_type_0"), val = tensor("valid")]; + tensor var_1808_strides_0 = const()[name = tensor("op_1808_strides_0"), val = tensor([1, 1])]; + tensor var_1808_pad_0 = const()[name = tensor("op_1808_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1808_dilations_0 = const()[name = tensor("op_1808_dilations_0"), val = tensor([1, 1])]; + tensor var_1808_groups_0 = const()[name = tensor("op_1808_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128165632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131442496))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131442624)))]; + tensor var_1808_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1808_dilations_0, groups = var_1808_groups_0, pad = var_1808_pad_0, pad_type = var_1808_pad_type_0, strides = var_1808_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_1808_cast_fp16")]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("valid")]; + tensor var_1814_strides_0 = const()[name = tensor("op_1814_strides_0"), val = tensor([1, 1])]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_dilations_0 = const()[name = tensor("op_1814_dilations_0"), val = tensor([1, 1])]; + tensor var_1814_groups_0 = const()[name = tensor("op_1814_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131699584))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131445248))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1814_dilations_0, groups = var_1814_groups_0, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1814_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_1808_cast_fp16, y = var_1814_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_1824 = const()[name = tensor("op_1824"), val = tensor(3)]; + tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; + tensor var_1843_to_fp16 = const()[name = tensor("op_1843_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1843_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132518848)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132521472)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_1865_pad_type_0 = const()[name = tensor("op_1865_pad_type_0"), val = tensor("valid")]; + tensor var_1865_strides_0 = const()[name = tensor("op_1865_strides_0"), val = tensor([1, 1])]; + tensor var_1865_pad_0 = const()[name = tensor("op_1865_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1865_dilations_0 = const()[name = tensor("op_1865_dilations_0"), val = tensor([1, 1])]; + tensor var_1865_groups_0 = const()[name = tensor("op_1865_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132524096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133343360))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133343488)))]; + tensor var_1865_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1865_dilations_0, groups = var_1865_groups_0, pad = var_1865_pad_0, pad_type = var_1865_pad_type_0, strides = var_1865_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1865_cast_fp16")]; + tensor var_1871_pad_type_0 = const()[name = tensor("op_1871_pad_type_0"), val = tensor("valid")]; + tensor var_1871_strides_0 = const()[name = tensor("op_1871_strides_0"), val = tensor([1, 1])]; + tensor var_1871_pad_0 = const()[name = tensor("op_1871_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1871_dilations_0 = const()[name = tensor("op_1871_dilations_0"), val = tensor([1, 1])]; + tensor var_1871_groups_0 = const()[name = tensor("op_1871_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133392896))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133346112))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1871_cast_fp16 = conv(dilations = var_1871_dilations_0, groups = var_1871_groups_0, pad = var_1871_pad_0, pad_type = var_1871_pad_type_0, strides = var_1871_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1871_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1865_cast_fp16, y = var_1871_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1880_pad_type_0 = const()[name = tensor("op_1880_pad_type_0"), val = tensor("valid")]; + tensor var_1880_strides_0 = const()[name = tensor("op_1880_strides_0"), val = tensor([1, 1])]; + tensor var_1880_pad_0 = const()[name = tensor("op_1880_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1880_dilations_0 = const()[name = tensor("op_1880_dilations_0"), val = tensor([1, 1])]; + tensor var_1880_groups_0 = const()[name = tensor("op_1880_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133597760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134417024))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1880_cast_fp16 = conv(dilations = var_1880_dilations_0, groups = var_1880_groups_0, pad = var_1880_pad_0, pad_type = var_1880_pad_type_0, strides = var_1880_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1880_cast_fp16")]; + tensor var_1886_pad_type_0 = const()[name = tensor("op_1886_pad_type_0"), val = tensor("valid")]; + tensor var_1886_strides_0 = const()[name = tensor("op_1886_strides_0"), val = tensor([1, 1])]; + tensor var_1886_pad_0 = const()[name = tensor("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1886_dilations_0 = const()[name = tensor("op_1886_dilations_0"), val = tensor([1, 1])]; + tensor var_1886_groups_0 = const()[name = tensor("op_1886_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134451584))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134417152))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1886_cast_fp16 = conv(dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1886_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1886_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1896_pad_type_0 = const()[name = tensor("op_1896_pad_type_0"), val = tensor("valid")]; + tensor var_1896_strides_0 = const()[name = tensor("op_1896_strides_0"), val = tensor([1, 1])]; + tensor var_1896_pad_0 = const()[name = tensor("op_1896_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1896_dilations_0 = const()[name = tensor("op_1896_dilations_0"), val = tensor([1, 1])]; + tensor var_1896_groups_0 = const()[name = tensor("op_1896_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134656448))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135475712))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135475840)))]; + tensor var_1896_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1896_dilations_0, groups = var_1896_groups_0, pad = var_1896_pad_0, pad_type = var_1896_pad_type_0, strides = var_1896_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_1896_cast_fp16")]; + tensor var_1902_pad_type_0 = const()[name = tensor("op_1902_pad_type_0"), val = tensor("valid")]; + tensor var_1902_strides_0 = const()[name = tensor("op_1902_strides_0"), val = tensor([1, 1])]; + tensor var_1902_pad_0 = const()[name = tensor("op_1902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1902_dilations_0 = const()[name = tensor("op_1902_dilations_0"), val = tensor([1, 1])]; + tensor var_1902_groups_0 = const()[name = tensor("op_1902_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135500160))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135478464))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1902_cast_fp16 = conv(dilations = var_1902_dilations_0, groups = var_1902_groups_0, pad = var_1902_pad_0, pad_type = var_1902_pad_type_0, strides = var_1902_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_1902_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1896_cast_fp16, y = var_1902_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1905 = const()[name = tensor("op_1905"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1905, x = query_19_cast_fp16)[name = tensor("mh_q_19_cast_fp16")]; + tensor var_1907_to_fp16 = const()[name = tensor("op_1907_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1908_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1907_to_fp16)[name = tensor("op_1908_cast_fp16")]; + tensor var_1909 = const()[name = tensor("op_1909"), val = tensor([1, 20, 64, -1])]; + tensor var_1910_cast_fp16 = reshape(shape = var_1909, x = key_19_cast_fp16)[name = tensor("op_1910_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1908_cast_fp16, y = var_1910_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor var_1913_cast_fp16 = softmax(axis = var_1824, x = mh_w_19_cast_fp16)[name = tensor("op_1913_cast_fp16")]; + tensor var_1914 = const()[name = tensor("op_1914"), val = tensor([1, 20, 64, -1])]; + tensor var_1915_cast_fp16 = reshape(shape = var_1914, x = value_19_cast_fp16)[name = tensor("op_1915_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1915_cast_fp16, y = var_1913_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1918 = const()[name = tensor("op_1918"), val = tensor([1, 1280, 1, -1])]; + tensor input_73_cast_fp16 = reshape(shape = var_1918, x = attn_19_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_1928_pad_type_0 = const()[name = tensor("op_1928_pad_type_0"), val = tensor("valid")]; + tensor var_1928_strides_0 = const()[name = tensor("op_1928_strides_0"), val = tensor([1, 1])]; + tensor var_1928_pad_0 = const()[name = tensor("op_1928_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1928_dilations_0 = const()[name = tensor("op_1928_dilations_0"), val = tensor([1, 1])]; + tensor var_1928_groups_0 = const()[name = tensor("op_1928_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135705024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136524288))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136524416)))]; + tensor var_1928_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1928_dilations_0, groups = var_1928_groups_0, pad = var_1928_pad_0, pad_type = var_1928_pad_type_0, strides = var_1928_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_1928_cast_fp16")]; + tensor var_1934_pad_type_0 = const()[name = tensor("op_1934_pad_type_0"), val = tensor("valid")]; + tensor var_1934_strides_0 = const()[name = tensor("op_1934_strides_0"), val = tensor([1, 1])]; + tensor var_1934_pad_0 = const()[name = tensor("op_1934_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1934_dilations_0 = const()[name = tensor("op_1934_dilations_0"), val = tensor([1, 1])]; + tensor var_1934_groups_0 = const()[name = tensor("op_1934_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136545344))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136527040))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_1934_cast_fp16 = conv(dilations = var_1934_dilations_0, groups = var_1934_groups_0, pad = var_1934_pad_0, pad_type = var_1934_pad_type_0, strides = var_1934_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_1934_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_1928_cast_fp16, y = var_1934_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; + tensor var_1945_to_fp16 = const()[name = tensor("op_1945_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1945_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136750208)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136752832)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_1963_pad_type_0 = const()[name = tensor("op_1963_pad_type_0"), val = tensor("valid")]; + tensor var_1963_strides_0 = const()[name = tensor("op_1963_strides_0"), val = tensor([1, 1])]; + tensor var_1963_pad_0 = const()[name = tensor("op_1963_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1963_dilations_0 = const()[name = tensor("op_1963_dilations_0"), val = tensor([1, 1])]; + tensor var_1963_groups_0 = const()[name = tensor("op_1963_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136755456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140032320))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140032448)))]; + tensor var_1963_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1963_dilations_0, groups = var_1963_groups_0, pad = var_1963_pad_0, pad_type = var_1963_pad_type_0, strides = var_1963_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_1963_cast_fp16")]; + tensor var_1969_pad_type_0 = const()[name = tensor("op_1969_pad_type_0"), val = tensor("valid")]; + tensor var_1969_strides_0 = const()[name = tensor("op_1969_strides_0"), val = tensor([1, 1])]; + tensor var_1969_pad_0 = const()[name = tensor("op_1969_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1969_dilations_0 = const()[name = tensor("op_1969_dilations_0"), val = tensor([1, 1])]; + tensor var_1969_groups_0 = const()[name = tensor("op_1969_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140073152))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140042752))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_1969_cast_fp16 = conv(dilations = var_1969_dilations_0, groups = var_1969_groups_0, pad = var_1969_pad_0, pad_type = var_1969_pad_type_0, strides = var_1969_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_1969_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_1963_cast_fp16, y = var_1969_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_1980_pad_type_0 = const()[name = tensor("op_1980_pad_type_0"), val = tensor("valid")]; + tensor var_1980_strides_0 = const()[name = tensor("op_1980_strides_0"), val = tensor([1, 1])]; + tensor var_1980_pad_0 = const()[name = tensor("op_1980_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1980_dilations_0 = const()[name = tensor("op_1980_dilations_0"), val = tensor([1, 1])]; + tensor var_1980_groups_0 = const()[name = tensor("op_1980_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140892416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144169280))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144169408)))]; + tensor var_1980_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1980_dilations_0, groups = var_1980_groups_0, pad = var_1980_pad_0, pad_type = var_1980_pad_type_0, strides = var_1980_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_1980_cast_fp16")]; + tensor var_1986_pad_type_0 = const()[name = tensor("op_1986_pad_type_0"), val = tensor("valid")]; + tensor var_1986_strides_0 = const()[name = tensor("op_1986_strides_0"), val = tensor([1, 1])]; + tensor var_1986_pad_0 = const()[name = tensor("op_1986_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1986_dilations_0 = const()[name = tensor("op_1986_dilations_0"), val = tensor([1, 1])]; + tensor var_1986_groups_0 = const()[name = tensor("op_1986_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144438144))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144172032))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_1986_cast_fp16 = conv(dilations = var_1986_dilations_0, groups = var_1986_groups_0, pad = var_1986_pad_0, pad_type = var_1986_pad_type_0, strides = var_1986_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_1986_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_1980_cast_fp16, y = var_1986_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor var_1996 = const()[name = tensor("op_1996"), val = tensor(3)]; + tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; + tensor var_2015_to_fp16 = const()[name = tensor("op_2015_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2015_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145257408)))]; + tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145260032)))]; + tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_2037_pad_type_0 = const()[name = tensor("op_2037_pad_type_0"), val = tensor("valid")]; + tensor var_2037_strides_0 = const()[name = tensor("op_2037_strides_0"), val = tensor([1, 1])]; + tensor var_2037_pad_0 = const()[name = tensor("op_2037_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2037_dilations_0 = const()[name = tensor("op_2037_dilations_0"), val = tensor([1, 1])]; + tensor var_2037_groups_0 = const()[name = tensor("op_2037_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145262656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146081920))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146082048)))]; + tensor var_2037_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2037_dilations_0, groups = var_2037_groups_0, pad = var_2037_pad_0, pad_type = var_2037_pad_type_0, strides = var_2037_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2037_cast_fp16")]; + tensor var_2043_pad_type_0 = const()[name = tensor("op_2043_pad_type_0"), val = tensor("valid")]; + tensor var_2043_strides_0 = const()[name = tensor("op_2043_strides_0"), val = tensor([1, 1])]; + tensor var_2043_pad_0 = const()[name = tensor("op_2043_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2043_dilations_0 = const()[name = tensor("op_2043_dilations_0"), val = tensor([1, 1])]; + tensor var_2043_groups_0 = const()[name = tensor("op_2043_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146130624))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146084672))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2043_cast_fp16 = conv(dilations = var_2043_dilations_0, groups = var_2043_groups_0, pad = var_2043_pad_0, pad_type = var_2043_pad_type_0, strides = var_2043_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2043_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_2037_cast_fp16, y = var_2043_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_2052_pad_type_0 = const()[name = tensor("op_2052_pad_type_0"), val = tensor("valid")]; + tensor var_2052_strides_0 = const()[name = tensor("op_2052_strides_0"), val = tensor([1, 1])]; + tensor var_2052_pad_0 = const()[name = tensor("op_2052_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2052_dilations_0 = const()[name = tensor("op_2052_dilations_0"), val = tensor([1, 1])]; + tensor var_2052_groups_0 = const()[name = tensor("op_2052_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146335488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154752))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2052_cast_fp16 = conv(dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2052_cast_fp16")]; + tensor var_2058_pad_type_0 = const()[name = tensor("op_2058_pad_type_0"), val = tensor("valid")]; + tensor var_2058_strides_0 = const()[name = tensor("op_2058_strides_0"), val = tensor([1, 1])]; + tensor var_2058_pad_0 = const()[name = tensor("op_2058_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2058_dilations_0 = const()[name = tensor("op_2058_dilations_0"), val = tensor([1, 1])]; + tensor var_2058_groups_0 = const()[name = tensor("op_2058_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147187392))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154880))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2058_cast_fp16 = conv(dilations = var_2058_dilations_0, groups = var_2058_groups_0, pad = var_2058_pad_0, pad_type = var_2058_pad_type_0, strides = var_2058_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_2052_cast_fp16, y = var_2058_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_2068_pad_type_0 = const()[name = tensor("op_2068_pad_type_0"), val = tensor("valid")]; + tensor var_2068_strides_0 = const()[name = tensor("op_2068_strides_0"), val = tensor([1, 1])]; + tensor var_2068_pad_0 = const()[name = tensor("op_2068_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2068_dilations_0 = const()[name = tensor("op_2068_dilations_0"), val = tensor([1, 1])]; + tensor var_2068_groups_0 = const()[name = tensor("op_2068_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147392256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148211520))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148211648)))]; + tensor var_2068_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2068_dilations_0, groups = var_2068_groups_0, pad = var_2068_pad_0, pad_type = var_2068_pad_type_0, strides = var_2068_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_2068_cast_fp16")]; + tensor var_2074_pad_type_0 = const()[name = tensor("op_2074_pad_type_0"), val = tensor("valid")]; + tensor var_2074_strides_0 = const()[name = tensor("op_2074_strides_0"), val = tensor([1, 1])]; + tensor var_2074_pad_0 = const()[name = tensor("op_2074_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2074_dilations_0 = const()[name = tensor("op_2074_dilations_0"), val = tensor([1, 1])]; + tensor var_2074_groups_0 = const()[name = tensor("op_2074_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148234688))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148214272))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2074_cast_fp16 = conv(dilations = var_2074_dilations_0, groups = var_2074_groups_0, pad = var_2074_pad_0, pad_type = var_2074_pad_type_0, strides = var_2074_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_2068_cast_fp16, y = var_2074_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_2077 = const()[name = tensor("op_2077"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_2077, x = query_21_cast_fp16)[name = tensor("mh_q_21_cast_fp16")]; + tensor var_2079_to_fp16 = const()[name = tensor("op_2079_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2080_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2079_to_fp16)[name = tensor("op_2080_cast_fp16")]; + tensor var_2081 = const()[name = tensor("op_2081"), val = tensor([1, 20, 64, -1])]; + tensor var_2082_cast_fp16 = reshape(shape = var_2081, x = key_21_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor mh_w_21_transpose_x_0 = const()[name = tensor("mh_w_21_transpose_x_0"), val = tensor(true)]; + tensor mh_w_21_transpose_y_0 = const()[name = tensor("mh_w_21_transpose_y_0"), val = tensor(false)]; + tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2080_cast_fp16, y = var_2082_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_2085_cast_fp16 = softmax(axis = var_1996, x = mh_w_21_cast_fp16)[name = tensor("op_2085_cast_fp16")]; + tensor var_2086 = const()[name = tensor("op_2086"), val = tensor([1, 20, 64, -1])]; + tensor var_2087_cast_fp16 = reshape(shape = var_2086, x = value_21_cast_fp16)[name = tensor("op_2087_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2087_cast_fp16, y = var_2085_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_2090 = const()[name = tensor("op_2090"), val = tensor([1, 1280, 1, -1])]; + tensor input_81_cast_fp16 = reshape(shape = var_2090, x = attn_21_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2100_pad_type_0 = const()[name = tensor("op_2100_pad_type_0"), val = tensor("valid")]; + tensor var_2100_strides_0 = const()[name = tensor("op_2100_strides_0"), val = tensor([1, 1])]; + tensor var_2100_pad_0 = const()[name = tensor("op_2100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2100_dilations_0 = const()[name = tensor("op_2100_dilations_0"), val = tensor([1, 1])]; + tensor var_2100_groups_0 = const()[name = tensor("op_2100_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148439552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149258816))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149258944)))]; + tensor var_2100_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2100_dilations_0, groups = var_2100_groups_0, pad = var_2100_pad_0, pad_type = var_2100_pad_type_0, strides = var_2100_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_2100_cast_fp16")]; + tensor var_2106_pad_type_0 = const()[name = tensor("op_2106_pad_type_0"), val = tensor("valid")]; + tensor var_2106_strides_0 = const()[name = tensor("op_2106_strides_0"), val = tensor([1, 1])]; + tensor var_2106_pad_0 = const()[name = tensor("op_2106_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2106_dilations_0 = const()[name = tensor("op_2106_dilations_0"), val = tensor([1, 1])]; + tensor var_2106_groups_0 = const()[name = tensor("op_2106_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149280384))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149261568))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2106_cast_fp16 = conv(dilations = var_2106_dilations_0, groups = var_2106_groups_0, pad = var_2106_pad_0, pad_type = var_2106_pad_type_0, strides = var_2106_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor obj_43_cast_fp16 = add(x = var_2100_cast_fp16, y = var_2106_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; + tensor var_2117_to_fp16 = const()[name = tensor("op_2117_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2117_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149485248)))]; + tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149487872)))]; + tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2135_pad_type_0 = const()[name = tensor("op_2135_pad_type_0"), val = tensor("valid")]; + tensor var_2135_strides_0 = const()[name = tensor("op_2135_strides_0"), val = tensor([1, 1])]; + tensor var_2135_pad_0 = const()[name = tensor("op_2135_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2135_dilations_0 = const()[name = tensor("op_2135_dilations_0"), val = tensor([1, 1])]; + tensor var_2135_groups_0 = const()[name = tensor("op_2135_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149490496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152767360))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152767488)))]; + tensor var_2135_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2135_dilations_0, groups = var_2135_groups_0, pad = var_2135_pad_0, pad_type = var_2135_pad_type_0, strides = var_2135_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_2135_cast_fp16")]; + tensor var_2141_pad_type_0 = const()[name = tensor("op_2141_pad_type_0"), val = tensor("valid")]; + tensor var_2141_strides_0 = const()[name = tensor("op_2141_strides_0"), val = tensor([1, 1])]; + tensor var_2141_pad_0 = const()[name = tensor("op_2141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2141_dilations_0 = const()[name = tensor("op_2141_dilations_0"), val = tensor([1, 1])]; + tensor var_2141_groups_0 = const()[name = tensor("op_2141_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152808704))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152777792))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_2141_cast_fp16 = conv(dilations = var_2141_dilations_0, groups = var_2141_groups_0, pad = var_2141_pad_0, pad_type = var_2141_pad_type_0, strides = var_2141_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_2141_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = var_2135_cast_fp16, y = var_2141_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; + tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_2152_pad_type_0 = const()[name = tensor("op_2152_pad_type_0"), val = tensor("valid")]; + tensor var_2152_strides_0 = const()[name = tensor("op_2152_strides_0"), val = tensor([1, 1])]; + tensor var_2152_pad_0 = const()[name = tensor("op_2152_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2152_dilations_0 = const()[name = tensor("op_2152_dilations_0"), val = tensor([1, 1])]; + tensor var_2152_groups_0 = const()[name = tensor("op_2152_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153627968))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156904832))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156904960)))]; + tensor var_2152_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2152_dilations_0, groups = var_2152_groups_0, pad = var_2152_pad_0, pad_type = var_2152_pad_type_0, strides = var_2152_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = tensor("op_2152_cast_fp16")]; + tensor var_2158_pad_type_0 = const()[name = tensor("op_2158_pad_type_0"), val = tensor("valid")]; + tensor var_2158_strides_0 = const()[name = tensor("op_2158_strides_0"), val = tensor([1, 1])]; + tensor var_2158_pad_0 = const()[name = tensor("op_2158_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2158_dilations_0 = const()[name = tensor("op_2158_dilations_0"), val = tensor([1, 1])]; + tensor var_2158_groups_0 = const()[name = tensor("op_2158_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157157056))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156907584))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_2158_cast_fp16 = conv(dilations = var_2158_dilations_0, groups = var_2158_groups_0, pad = var_2158_pad_0, pad_type = var_2158_pad_type_0, strides = var_2158_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_2152_cast_fp16, y = var_2158_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor var_2168 = const()[name = tensor("op_2168"), val = tensor(3)]; + tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; + tensor var_2187_to_fp16 = const()[name = tensor("op_2187_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2187_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157976320)))]; + tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157978944)))]; + tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; + tensor var_2209_pad_type_0 = const()[name = tensor("op_2209_pad_type_0"), val = tensor("valid")]; + tensor var_2209_strides_0 = const()[name = tensor("op_2209_strides_0"), val = tensor([1, 1])]; + tensor var_2209_pad_0 = const()[name = tensor("op_2209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2209_dilations_0 = const()[name = tensor("op_2209_dilations_0"), val = tensor([1, 1])]; + tensor var_2209_groups_0 = const()[name = tensor("op_2209_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157981568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158800832))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158800960)))]; + tensor var_2209_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2209_dilations_0, groups = var_2209_groups_0, pad = var_2209_pad_0, pad_type = var_2209_pad_type_0, strides = var_2209_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2209_cast_fp16")]; + tensor var_2215_pad_type_0 = const()[name = tensor("op_2215_pad_type_0"), val = tensor("valid")]; + tensor var_2215_strides_0 = const()[name = tensor("op_2215_strides_0"), val = tensor([1, 1])]; + tensor var_2215_pad_0 = const()[name = tensor("op_2215_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2215_dilations_0 = const()[name = tensor("op_2215_dilations_0"), val = tensor([1, 1])]; + tensor var_2215_groups_0 = const()[name = tensor("op_2215_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158852864))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158803584))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2215_cast_fp16 = conv(dilations = var_2215_dilations_0, groups = var_2215_groups_0, pad = var_2215_pad_0, pad_type = var_2215_pad_type_0, strides = var_2215_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2215_cast_fp16")]; + tensor query_23_cast_fp16 = add(x = var_2209_cast_fp16, y = var_2215_cast_fp16)[name = tensor("query_23_cast_fp16")]; + tensor var_2224_pad_type_0 = const()[name = tensor("op_2224_pad_type_0"), val = tensor("valid")]; + tensor var_2224_strides_0 = const()[name = tensor("op_2224_strides_0"), val = tensor([1, 1])]; + tensor var_2224_pad_0 = const()[name = tensor("op_2224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2224_dilations_0 = const()[name = tensor("op_2224_dilations_0"), val = tensor([1, 1])]; + tensor var_2224_groups_0 = const()[name = tensor("op_2224_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159057728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159876992))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2224_cast_fp16 = conv(dilations = var_2224_dilations_0, groups = var_2224_groups_0, pad = var_2224_pad_0, pad_type = var_2224_pad_type_0, strides = var_2224_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2230_pad_type_0 = const()[name = tensor("op_2230_pad_type_0"), val = tensor("valid")]; + tensor var_2230_strides_0 = const()[name = tensor("op_2230_strides_0"), val = tensor([1, 1])]; + tensor var_2230_pad_0 = const()[name = tensor("op_2230_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2230_dilations_0 = const()[name = tensor("op_2230_dilations_0"), val = tensor([1, 1])]; + tensor var_2230_groups_0 = const()[name = tensor("op_2230_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159906048))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159877120))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2230_cast_fp16 = conv(dilations = var_2230_dilations_0, groups = var_2230_groups_0, pad = var_2230_pad_0, pad_type = var_2230_pad_type_0, strides = var_2230_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2230_cast_fp16")]; + tensor key_23_cast_fp16 = add(x = var_2224_cast_fp16, y = var_2230_cast_fp16)[name = tensor("key_23_cast_fp16")]; + tensor var_2240_pad_type_0 = const()[name = tensor("op_2240_pad_type_0"), val = tensor("valid")]; + tensor var_2240_strides_0 = const()[name = tensor("op_2240_strides_0"), val = tensor([1, 1])]; + tensor var_2240_pad_0 = const()[name = tensor("op_2240_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2240_dilations_0 = const()[name = tensor("op_2240_dilations_0"), val = tensor([1, 1])]; + tensor var_2240_groups_0 = const()[name = tensor("op_2240_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160110912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160930176))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160930304)))]; + tensor var_2240_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2240_dilations_0, groups = var_2240_groups_0, pad = var_2240_pad_0, pad_type = var_2240_pad_type_0, strides = var_2240_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2246_pad_type_0 = const()[name = tensor("op_2246_pad_type_0"), val = tensor("valid")]; + tensor var_2246_strides_0 = const()[name = tensor("op_2246_strides_0"), val = tensor([1, 1])]; + tensor var_2246_pad_0 = const()[name = tensor("op_2246_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2246_dilations_0 = const()[name = tensor("op_2246_dilations_0"), val = tensor([1, 1])]; + tensor var_2246_groups_0 = const()[name = tensor("op_2246_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160953216))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160932928))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2246_cast_fp16 = conv(dilations = var_2246_dilations_0, groups = var_2246_groups_0, pad = var_2246_pad_0, pad_type = var_2246_pad_type_0, strides = var_2246_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_2246_cast_fp16")]; + tensor value_23_cast_fp16 = add(x = var_2240_cast_fp16, y = var_2246_cast_fp16)[name = tensor("value_23_cast_fp16")]; + tensor var_2249 = const()[name = tensor("op_2249"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_23_cast_fp16 = reshape(shape = var_2249, x = query_23_cast_fp16)[name = tensor("mh_q_23_cast_fp16")]; + tensor var_2251_to_fp16 = const()[name = tensor("op_2251_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2252_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_2251_to_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2253 = const()[name = tensor("op_2253"), val = tensor([1, 20, 64, -1])]; + tensor var_2254_cast_fp16 = reshape(shape = var_2253, x = key_23_cast_fp16)[name = tensor("op_2254_cast_fp16")]; + tensor mh_w_23_transpose_x_0 = const()[name = tensor("mh_w_23_transpose_x_0"), val = tensor(true)]; + tensor mh_w_23_transpose_y_0 = const()[name = tensor("mh_w_23_transpose_y_0"), val = tensor(false)]; + tensor mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_2252_cast_fp16, y = var_2254_cast_fp16)[name = tensor("mh_w_23_cast_fp16")]; + tensor var_2257_cast_fp16 = softmax(axis = var_2168, x = mh_w_23_cast_fp16)[name = tensor("op_2257_cast_fp16")]; + tensor var_2258 = const()[name = tensor("op_2258"), val = tensor([1, 20, 64, -1])]; + tensor var_2259_cast_fp16 = reshape(shape = var_2258, x = value_23_cast_fp16)[name = tensor("op_2259_cast_fp16")]; + tensor attn_23_transpose_x_0 = const()[name = tensor("attn_23_transpose_x_0"), val = tensor(false)]; + tensor attn_23_transpose_y_0 = const()[name = tensor("attn_23_transpose_y_0"), val = tensor(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_2259_cast_fp16, y = var_2257_cast_fp16)[name = tensor("attn_23_cast_fp16")]; + tensor var_2262 = const()[name = tensor("op_2262"), val = tensor([1, 1280, 1, -1])]; + tensor input_89_cast_fp16 = reshape(shape = var_2262, x = attn_23_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2272_pad_type_0 = const()[name = tensor("op_2272_pad_type_0"), val = tensor("valid")]; + tensor var_2272_strides_0 = const()[name = tensor("op_2272_strides_0"), val = tensor([1, 1])]; + tensor var_2272_pad_0 = const()[name = tensor("op_2272_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2272_dilations_0 = const()[name = tensor("op_2272_dilations_0"), val = tensor([1, 1])]; + tensor var_2272_groups_0 = const()[name = tensor("op_2272_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161158080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161977344))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161977472)))]; + tensor var_2272_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2272_dilations_0, groups = var_2272_groups_0, pad = var_2272_pad_0, pad_type = var_2272_pad_type_0, strides = var_2272_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2278_pad_type_0 = const()[name = tensor("op_2278_pad_type_0"), val = tensor("valid")]; + tensor var_2278_strides_0 = const()[name = tensor("op_2278_strides_0"), val = tensor([1, 1])]; + tensor var_2278_pad_0 = const()[name = tensor("op_2278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2278_dilations_0 = const()[name = tensor("op_2278_dilations_0"), val = tensor([1, 1])]; + tensor var_2278_groups_0 = const()[name = tensor("op_2278_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161997632))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161980096))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2278_cast_fp16 = conv(dilations = var_2278_dilations_0, groups = var_2278_groups_0, pad = var_2278_pad_0, pad_type = var_2278_pad_type_0, strides = var_2278_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_2278_cast_fp16")]; + tensor obj_47_cast_fp16 = add(x = var_2272_cast_fp16, y = var_2278_cast_fp16)[name = tensor("obj_47_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_47_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; + tensor var_2289_to_fp16 = const()[name = tensor("op_2289_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2289_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162202496)))]; + tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162205120)))]; + tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_2307_pad_type_0 = const()[name = tensor("op_2307_pad_type_0"), val = tensor("valid")]; + tensor var_2307_strides_0 = const()[name = tensor("op_2307_strides_0"), val = tensor([1, 1])]; + tensor var_2307_pad_0 = const()[name = tensor("op_2307_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2307_dilations_0 = const()[name = tensor("op_2307_dilations_0"), val = tensor([1, 1])]; + tensor var_2307_groups_0 = const()[name = tensor("op_2307_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162207744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165484608))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165484736)))]; + tensor var_2307_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2307_dilations_0, groups = var_2307_groups_0, pad = var_2307_pad_0, pad_type = var_2307_pad_type_0, strides = var_2307_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_2307_cast_fp16")]; + tensor var_2313_pad_type_0 = const()[name = tensor("op_2313_pad_type_0"), val = tensor("valid")]; + tensor var_2313_strides_0 = const()[name = tensor("op_2313_strides_0"), val = tensor([1, 1])]; + tensor var_2313_pad_0 = const()[name = tensor("op_2313_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2313_dilations_0 = const()[name = tensor("op_2313_dilations_0"), val = tensor([1, 1])]; + tensor var_2313_groups_0 = const()[name = tensor("op_2313_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165526016))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165495040))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_2313_cast_fp16 = conv(dilations = var_2313_dilations_0, groups = var_2313_groups_0, pad = var_2313_pad_0, pad_type = var_2313_pad_type_0, strides = var_2313_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_2313_cast_fp16")]; + tensor input_93_cast_fp16 = add(x = var_2307_cast_fp16, y = var_2313_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; + tensor input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_2324_pad_type_0 = const()[name = tensor("op_2324_pad_type_0"), val = tensor("valid")]; + tensor var_2324_strides_0 = const()[name = tensor("op_2324_strides_0"), val = tensor([1, 1])]; + tensor var_2324_pad_0 = const()[name = tensor("op_2324_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2324_dilations_0 = const()[name = tensor("op_2324_dilations_0"), val = tensor([1, 1])]; + tensor var_2324_groups_0 = const()[name = tensor("op_2324_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166345280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169622144))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169622272)))]; + tensor var_2324_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2324_dilations_0, groups = var_2324_groups_0, pad = var_2324_pad_0, pad_type = var_2324_pad_type_0, strides = var_2324_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor("op_2324_cast_fp16")]; + tensor var_2330_pad_type_0 = const()[name = tensor("op_2330_pad_type_0"), val = tensor("valid")]; + tensor var_2330_strides_0 = const()[name = tensor("op_2330_strides_0"), val = tensor([1, 1])]; + tensor var_2330_pad_0 = const()[name = tensor("op_2330_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2330_dilations_0 = const()[name = tensor("op_2330_dilations_0"), val = tensor([1, 1])]; + tensor var_2330_groups_0 = const()[name = tensor("op_2330_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169852352))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169624896))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_2330_cast_fp16 = conv(dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor hidden_states_27_cast_fp16 = add(x = var_2324_cast_fp16, y = var_2330_cast_fp16)[name = tensor("hidden_states_27_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_27_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; + tensor var_2340 = const()[name = tensor("op_2340"), val = tensor(3)]; + tensor out_49_axes_0 = const()[name = tensor("out_49_axes_0"), val = tensor([1])]; + tensor var_2359_to_fp16 = const()[name = tensor("op_2359_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2359_to_fp16, x = inputs_49_cast_fp16)[name = tensor("out_49_cast_fp16")]; + tensor obj_49_gamma_0_to_fp16 = const()[name = tensor("obj_49_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170671616)))]; + tensor obj_49_beta_0_to_fp16 = const()[name = tensor("obj_49_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170674240)))]; + tensor obj_49_epsilon_0_to_fp16 = const()[name = tensor("obj_49_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_49_cast_fp16 = batch_norm(beta = obj_49_beta_0_to_fp16, epsilon = obj_49_epsilon_0_to_fp16, gamma = obj_49_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor var_2381_pad_type_0 = const()[name = tensor("op_2381_pad_type_0"), val = tensor("valid")]; + tensor var_2381_strides_0 = const()[name = tensor("op_2381_strides_0"), val = tensor([1, 1])]; + tensor var_2381_pad_0 = const()[name = tensor("op_2381_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2381_dilations_0 = const()[name = tensor("op_2381_dilations_0"), val = tensor([1, 1])]; + tensor var_2381_groups_0 = const()[name = tensor("op_2381_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170676864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171496128))), name = tensor("layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_12_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171496256)))]; + tensor var_2381_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2381_dilations_0, groups = var_2381_groups_0, pad = var_2381_pad_0, pad_type = var_2381_pad_type_0, strides = var_2381_strides_0, weight = layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_2381_cast_fp16")]; + tensor var_2387_pad_type_0 = const()[name = tensor("op_2387_pad_type_0"), val = tensor("valid")]; + tensor var_2387_strides_0 = const()[name = tensor("op_2387_strides_0"), val = tensor([1, 1])]; + tensor var_2387_pad_0 = const()[name = tensor("op_2387_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2387_dilations_0 = const()[name = tensor("op_2387_dilations_0"), val = tensor([1, 1])]; + tensor var_2387_groups_0 = const()[name = tensor("op_2387_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171547328))), name = tensor("layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171498880))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2387_cast_fp16 = conv(dilations = var_2387_dilations_0, groups = var_2387_groups_0, pad = var_2387_pad_0, pad_type = var_2387_pad_type_0, strides = var_2387_strides_0, weight = layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_2387_cast_fp16")]; + tensor query_25_cast_fp16 = add(x = var_2381_cast_fp16, y = var_2387_cast_fp16)[name = tensor("query_25_cast_fp16")]; + tensor var_2396_pad_type_0 = const()[name = tensor("op_2396_pad_type_0"), val = tensor("valid")]; + tensor var_2396_strides_0 = const()[name = tensor("op_2396_strides_0"), val = tensor([1, 1])]; + tensor var_2396_pad_0 = const()[name = tensor("op_2396_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2396_dilations_0 = const()[name = tensor("op_2396_dilations_0"), val = tensor([1, 1])]; + tensor var_2396_groups_0 = const()[name = tensor("op_2396_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171752192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172571456))), name = tensor("layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2396_cast_fp16 = conv(dilations = var_2396_dilations_0, groups = var_2396_groups_0, pad = var_2396_pad_0, pad_type = var_2396_pad_type_0, strides = var_2396_strides_0, weight = layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_2396_cast_fp16")]; + tensor var_2402_pad_type_0 = const()[name = tensor("op_2402_pad_type_0"), val = tensor("valid")]; + tensor var_2402_strides_0 = const()[name = tensor("op_2402_strides_0"), val = tensor([1, 1])]; + tensor var_2402_pad_0 = const()[name = tensor("op_2402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2402_dilations_0 = const()[name = tensor("op_2402_dilations_0"), val = tensor([1, 1])]; + tensor var_2402_groups_0 = const()[name = tensor("op_2402_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172602112))), name = tensor("layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172571584))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2402_cast_fp16 = conv(dilations = var_2402_dilations_0, groups = var_2402_groups_0, pad = var_2402_pad_0, pad_type = var_2402_pad_type_0, strides = var_2402_strides_0, weight = layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor key_25_cast_fp16 = add(x = var_2396_cast_fp16, y = var_2402_cast_fp16)[name = tensor("key_25_cast_fp16")]; + tensor var_2412_pad_type_0 = const()[name = tensor("op_2412_pad_type_0"), val = tensor("valid")]; + tensor var_2412_strides_0 = const()[name = tensor("op_2412_strides_0"), val = tensor([1, 1])]; + tensor var_2412_pad_0 = const()[name = tensor("op_2412_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2412_dilations_0 = const()[name = tensor("op_2412_dilations_0"), val = tensor([1, 1])]; + tensor var_2412_groups_0 = const()[name = tensor("op_2412_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172806976))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173626240))), name = tensor("layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_12_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173626368)))]; + tensor var_2412_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2412_dilations_0, groups = var_2412_groups_0, pad = var_2412_pad_0, pad_type = var_2412_pad_type_0, strides = var_2412_strides_0, weight = layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_2412_cast_fp16")]; + tensor var_2418_pad_type_0 = const()[name = tensor("op_2418_pad_type_0"), val = tensor("valid")]; + tensor var_2418_strides_0 = const()[name = tensor("op_2418_strides_0"), val = tensor([1, 1])]; + tensor var_2418_pad_0 = const()[name = tensor("op_2418_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2418_dilations_0 = const()[name = tensor("op_2418_dilations_0"), val = tensor([1, 1])]; + tensor var_2418_groups_0 = const()[name = tensor("op_2418_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173648128))), name = tensor("layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173628992))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2418_cast_fp16 = conv(dilations = var_2418_dilations_0, groups = var_2418_groups_0, pad = var_2418_pad_0, pad_type = var_2418_pad_type_0, strides = var_2418_strides_0, weight = layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_2418_cast_fp16")]; + tensor value_25_cast_fp16 = add(x = var_2412_cast_fp16, y = var_2418_cast_fp16)[name = tensor("value_25_cast_fp16")]; + tensor var_2421 = const()[name = tensor("op_2421"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_2421, x = query_25_cast_fp16)[name = tensor("mh_q_25_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2424_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2425 = const()[name = tensor("op_2425"), val = tensor([1, 20, 64, -1])]; + tensor var_2426_cast_fp16 = reshape(shape = var_2425, x = key_25_cast_fp16)[name = tensor("op_2426_cast_fp16")]; + tensor mh_w_25_transpose_x_0 = const()[name = tensor("mh_w_25_transpose_x_0"), val = tensor(true)]; + tensor mh_w_25_transpose_y_0 = const()[name = tensor("mh_w_25_transpose_y_0"), val = tensor(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_2424_cast_fp16, y = var_2426_cast_fp16)[name = tensor("mh_w_25_cast_fp16")]; + tensor var_2429_cast_fp16 = softmax(axis = var_2340, x = mh_w_25_cast_fp16)[name = tensor("op_2429_cast_fp16")]; + tensor var_2430 = const()[name = tensor("op_2430"), val = tensor([1, 20, 64, -1])]; + tensor var_2431_cast_fp16 = reshape(shape = var_2430, x = value_25_cast_fp16)[name = tensor("op_2431_cast_fp16")]; + tensor attn_25_transpose_x_0 = const()[name = tensor("attn_25_transpose_x_0"), val = tensor(false)]; + tensor attn_25_transpose_y_0 = const()[name = tensor("attn_25_transpose_y_0"), val = tensor(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_2431_cast_fp16, y = var_2429_cast_fp16)[name = tensor("attn_25_cast_fp16")]; + tensor var_2434 = const()[name = tensor("op_2434"), val = tensor([1, 1280, 1, -1])]; + tensor input_97_cast_fp16 = reshape(shape = var_2434, x = attn_25_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor var_2444_pad_type_0 = const()[name = tensor("op_2444_pad_type_0"), val = tensor("valid")]; + tensor var_2444_strides_0 = const()[name = tensor("op_2444_strides_0"), val = tensor([1, 1])]; + tensor var_2444_pad_0 = const()[name = tensor("op_2444_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2444_dilations_0 = const()[name = tensor("op_2444_dilations_0"), val = tensor([1, 1])]; + tensor var_2444_groups_0 = const()[name = tensor("op_2444_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173852992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174672256))), name = tensor("layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_12_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174672384)))]; + tensor var_2444_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2444_dilations_0, groups = var_2444_groups_0, pad = var_2444_pad_0, pad_type = var_2444_pad_type_0, strides = var_2444_strides_0, weight = layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_97_cast_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2450_pad_type_0 = const()[name = tensor("op_2450_pad_type_0"), val = tensor("valid")]; + tensor var_2450_strides_0 = const()[name = tensor("op_2450_strides_0"), val = tensor([1, 1])]; + tensor var_2450_pad_0 = const()[name = tensor("op_2450_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2450_dilations_0 = const()[name = tensor("op_2450_dilations_0"), val = tensor([1, 1])]; + tensor var_2450_groups_0 = const()[name = tensor("op_2450_groups_0"), val = tensor(1)]; + tensor layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174690880))), name = tensor("layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174675008))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2450_cast_fp16 = conv(dilations = var_2450_dilations_0, groups = var_2450_groups_0, pad = var_2450_pad_0, pad_type = var_2450_pad_type_0, strides = var_2450_strides_0, weight = layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_97_cast_fp16)[name = tensor("op_2450_cast_fp16")]; + tensor obj_51_cast_fp16 = add(x = var_2444_cast_fp16, y = var_2450_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_51_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; + tensor out_51_axes_0 = const()[name = tensor("out_51_axes_0"), val = tensor([1])]; + tensor var_2461_to_fp16 = const()[name = tensor("op_2461_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2461_to_fp16, x = inputs_51_cast_fp16)[name = tensor("out_51_cast_fp16")]; + tensor input_99_gamma_0_to_fp16 = const()[name = tensor("input_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174895744)))]; + tensor input_99_beta_0_to_fp16 = const()[name = tensor("input_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174898368)))]; + tensor input_99_epsilon_0_to_fp16 = const()[name = tensor("input_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_99_cast_fp16 = batch_norm(beta = input_99_beta_0_to_fp16, epsilon = input_99_epsilon_0_to_fp16, gamma = input_99_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_2479_pad_type_0 = const()[name = tensor("op_2479_pad_type_0"), val = tensor("valid")]; + tensor var_2479_strides_0 = const()[name = tensor("op_2479_strides_0"), val = tensor([1, 1])]; + tensor var_2479_pad_0 = const()[name = tensor("op_2479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2479_dilations_0 = const()[name = tensor("op_2479_dilations_0"), val = tensor([1, 1])]; + tensor var_2479_groups_0 = const()[name = tensor("op_2479_groups_0"), val = tensor(1)]; + tensor layers_12_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174900992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178177856))), name = tensor("layers_12_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_12_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178177984)))]; + tensor var_2479_cast_fp16 = conv(bias = layers_12_fc1_inlier_module_bias_to_fp16, dilations = var_2479_dilations_0, groups = var_2479_groups_0, pad = var_2479_pad_0, pad_type = var_2479_pad_type_0, strides = var_2479_strides_0, weight = layers_12_fc1_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = tensor("op_2479_cast_fp16")]; + tensor var_2485_pad_type_0 = const()[name = tensor("op_2485_pad_type_0"), val = tensor("valid")]; + tensor var_2485_strides_0 = const()[name = tensor("op_2485_strides_0"), val = tensor([1, 1])]; + tensor var_2485_pad_0 = const()[name = tensor("op_2485_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2485_dilations_0 = const()[name = tensor("op_2485_dilations_0"), val = tensor([1, 1])]; + tensor var_2485_groups_0 = const()[name = tensor("op_2485_groups_0"), val = tensor(1)]; + tensor layers_12_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178232512))), name = tensor("layers_12_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178188288))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_2485_cast_fp16 = conv(dilations = var_2485_dilations_0, groups = var_2485_groups_0, pad = var_2485_pad_0, pad_type = var_2485_pad_type_0, strides = var_2485_strides_0, weight = layers_12_fc1_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = tensor("op_2485_cast_fp16")]; + tensor input_101_cast_fp16 = add(x = var_2479_cast_fp16, y = var_2485_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor input_103_mode_0 = const()[name = tensor("input_103_mode_0"), val = tensor("EXACT")]; + tensor input_103_cast_fp16 = gelu(mode = input_103_mode_0, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_2496_pad_type_0 = const()[name = tensor("op_2496_pad_type_0"), val = tensor("valid")]; + tensor var_2496_strides_0 = const()[name = tensor("op_2496_strides_0"), val = tensor([1, 1])]; + tensor var_2496_pad_0 = const()[name = tensor("op_2496_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2496_dilations_0 = const()[name = tensor("op_2496_dilations_0"), val = tensor([1, 1])]; + tensor var_2496_groups_0 = const()[name = tensor("op_2496_groups_0"), val = tensor(1)]; + tensor layers_12_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179051776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182328640))), name = tensor("layers_12_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_12_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182328768)))]; + tensor var_2496_cast_fp16 = conv(bias = layers_12_fc2_inlier_module_bias_to_fp16, dilations = var_2496_dilations_0, groups = var_2496_groups_0, pad = var_2496_pad_0, pad_type = var_2496_pad_type_0, strides = var_2496_strides_0, weight = layers_12_fc2_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2502_pad_type_0 = const()[name = tensor("op_2502_pad_type_0"), val = tensor("valid")]; + tensor var_2502_strides_0 = const()[name = tensor("op_2502_strides_0"), val = tensor([1, 1])]; + tensor var_2502_pad_0 = const()[name = tensor("op_2502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2502_dilations_0 = const()[name = tensor("op_2502_dilations_0"), val = tensor([1, 1])]; + tensor var_2502_groups_0 = const()[name = tensor("op_2502_groups_0"), val = tensor(1)]; + tensor layers_12_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182509056))), name = tensor("layers_12_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182331392))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_2502_cast_fp16 = conv(dilations = var_2502_dilations_0, groups = var_2502_groups_0, pad = var_2502_pad_0, pad_type = var_2502_pad_type_0, strides = var_2502_strides_0, weight = layers_12_fc2_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = tensor("op_2502_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = var_2496_cast_fp16, y = var_2502_cast_fp16)[name = tensor("hidden_states_29_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = hidden_states_29_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; + tensor var_2512 = const()[name = tensor("op_2512"), val = tensor(3)]; + tensor out_53_axes_0 = const()[name = tensor("out_53_axes_0"), val = tensor([1])]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2531_to_fp16, x = inputs_53_cast_fp16)[name = tensor("out_53_cast_fp16")]; + tensor obj_53_gamma_0_to_fp16 = const()[name = tensor("obj_53_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183328320)))]; + tensor obj_53_beta_0_to_fp16 = const()[name = tensor("obj_53_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183330944)))]; + tensor obj_53_epsilon_0_to_fp16 = const()[name = tensor("obj_53_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_53_cast_fp16 = batch_norm(beta = obj_53_beta_0_to_fp16, epsilon = obj_53_epsilon_0_to_fp16, gamma = obj_53_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor var_2553_pad_type_0 = const()[name = tensor("op_2553_pad_type_0"), val = tensor("valid")]; + tensor var_2553_strides_0 = const()[name = tensor("op_2553_strides_0"), val = tensor([1, 1])]; + tensor var_2553_pad_0 = const()[name = tensor("op_2553_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2553_dilations_0 = const()[name = tensor("op_2553_dilations_0"), val = tensor([1, 1])]; + tensor var_2553_groups_0 = const()[name = tensor("op_2553_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183333568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184152832))), name = tensor("layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_13_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184152960)))]; + tensor var_2553_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2553_dilations_0, groups = var_2553_groups_0, pad = var_2553_pad_0, pad_type = var_2553_pad_type_0, strides = var_2553_strides_0, weight = layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_2553_cast_fp16")]; + tensor var_2559_pad_type_0 = const()[name = tensor("op_2559_pad_type_0"), val = tensor("valid")]; + tensor var_2559_strides_0 = const()[name = tensor("op_2559_strides_0"), val = tensor([1, 1])]; + tensor var_2559_pad_0 = const()[name = tensor("op_2559_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2559_dilations_0 = const()[name = tensor("op_2559_dilations_0"), val = tensor([1, 1])]; + tensor var_2559_groups_0 = const()[name = tensor("op_2559_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184209280))), name = tensor("layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184155584))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2559_cast_fp16 = conv(dilations = var_2559_dilations_0, groups = var_2559_groups_0, pad = var_2559_pad_0, pad_type = var_2559_pad_type_0, strides = var_2559_strides_0, weight = layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_2559_cast_fp16")]; + tensor query_27_cast_fp16 = add(x = var_2553_cast_fp16, y = var_2559_cast_fp16)[name = tensor("query_27_cast_fp16")]; + tensor var_2568_pad_type_0 = const()[name = tensor("op_2568_pad_type_0"), val = tensor("valid")]; + tensor var_2568_strides_0 = const()[name = tensor("op_2568_strides_0"), val = tensor([1, 1])]; + tensor var_2568_pad_0 = const()[name = tensor("op_2568_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2568_dilations_0 = const()[name = tensor("op_2568_dilations_0"), val = tensor([1, 1])]; + tensor var_2568_groups_0 = const()[name = tensor("op_2568_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184414144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185233408))), name = tensor("layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2568_cast_fp16 = conv(dilations = var_2568_dilations_0, groups = var_2568_groups_0, pad = var_2568_pad_0, pad_type = var_2568_pad_type_0, strides = var_2568_strides_0, weight = layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_2568_cast_fp16")]; + tensor var_2574_pad_type_0 = const()[name = tensor("op_2574_pad_type_0"), val = tensor("valid")]; + tensor var_2574_strides_0 = const()[name = tensor("op_2574_strides_0"), val = tensor([1, 1])]; + tensor var_2574_pad_0 = const()[name = tensor("op_2574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2574_dilations_0 = const()[name = tensor("op_2574_dilations_0"), val = tensor([1, 1])]; + tensor var_2574_groups_0 = const()[name = tensor("op_2574_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185265728))), name = tensor("layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185233536))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2574_cast_fp16 = conv(dilations = var_2574_dilations_0, groups = var_2574_groups_0, pad = var_2574_pad_0, pad_type = var_2574_pad_type_0, strides = var_2574_strides_0, weight = layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_2574_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_2568_cast_fp16, y = var_2574_cast_fp16)[name = tensor("key_27_cast_fp16")]; + tensor var_2584_pad_type_0 = const()[name = tensor("op_2584_pad_type_0"), val = tensor("valid")]; + tensor var_2584_strides_0 = const()[name = tensor("op_2584_strides_0"), val = tensor([1, 1])]; + tensor var_2584_pad_0 = const()[name = tensor("op_2584_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2584_dilations_0 = const()[name = tensor("op_2584_dilations_0"), val = tensor([1, 1])]; + tensor var_2584_groups_0 = const()[name = tensor("op_2584_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185470592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186289856))), name = tensor("layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_13_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186289984)))]; + tensor var_2584_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2584_dilations_0, groups = var_2584_groups_0, pad = var_2584_pad_0, pad_type = var_2584_pad_type_0, strides = var_2584_strides_0, weight = layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2590_pad_type_0 = const()[name = tensor("op_2590_pad_type_0"), val = tensor("valid")]; + tensor var_2590_strides_0 = const()[name = tensor("op_2590_strides_0"), val = tensor([1, 1])]; + tensor var_2590_pad_0 = const()[name = tensor("op_2590_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2590_dilations_0 = const()[name = tensor("op_2590_dilations_0"), val = tensor([1, 1])]; + tensor var_2590_groups_0 = const()[name = tensor("op_2590_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186311936))), name = tensor("layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186292608))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2590_cast_fp16 = conv(dilations = var_2590_dilations_0, groups = var_2590_groups_0, pad = var_2590_pad_0, pad_type = var_2590_pad_type_0, strides = var_2590_strides_0, weight = layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor value_27_cast_fp16 = add(x = var_2584_cast_fp16, y = var_2590_cast_fp16)[name = tensor("value_27_cast_fp16")]; + tensor var_2593 = const()[name = tensor("op_2593"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_27_cast_fp16 = reshape(shape = var_2593, x = query_27_cast_fp16)[name = tensor("mh_q_27_cast_fp16")]; + tensor var_2595_to_fp16 = const()[name = tensor("op_2595_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2596_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_2595_to_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597 = const()[name = tensor("op_2597"), val = tensor([1, 20, 64, -1])]; + tensor var_2598_cast_fp16 = reshape(shape = var_2597, x = key_27_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor mh_w_27_transpose_x_0 = const()[name = tensor("mh_w_27_transpose_x_0"), val = tensor(true)]; + tensor mh_w_27_transpose_y_0 = const()[name = tensor("mh_w_27_transpose_y_0"), val = tensor(false)]; + tensor mh_w_27_cast_fp16 = matmul(transpose_x = mh_w_27_transpose_x_0, transpose_y = mh_w_27_transpose_y_0, x = var_2596_cast_fp16, y = var_2598_cast_fp16)[name = tensor("mh_w_27_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_2512, x = mh_w_27_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602 = const()[name = tensor("op_2602"), val = tensor([1, 20, 64, -1])]; + tensor var_2603_cast_fp16 = reshape(shape = var_2602, x = value_27_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor attn_27_transpose_x_0 = const()[name = tensor("attn_27_transpose_x_0"), val = tensor(false)]; + tensor attn_27_transpose_y_0 = const()[name = tensor("attn_27_transpose_y_0"), val = tensor(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_2603_cast_fp16, y = var_2601_cast_fp16)[name = tensor("attn_27_cast_fp16")]; + tensor var_2606 = const()[name = tensor("op_2606"), val = tensor([1, 1280, 1, -1])]; + tensor input_105_cast_fp16 = reshape(shape = var_2606, x = attn_27_cast_fp16)[name = tensor("input_105_cast_fp16")]; + tensor var_2616_pad_type_0 = const()[name = tensor("op_2616_pad_type_0"), val = tensor("valid")]; + tensor var_2616_strides_0 = const()[name = tensor("op_2616_strides_0"), val = tensor([1, 1])]; + tensor var_2616_pad_0 = const()[name = tensor("op_2616_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2616_dilations_0 = const()[name = tensor("op_2616_dilations_0"), val = tensor([1, 1])]; + tensor var_2616_groups_0 = const()[name = tensor("op_2616_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186516800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187336064))), name = tensor("layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_13_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187336192)))]; + tensor var_2616_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2616_dilations_0, groups = var_2616_groups_0, pad = var_2616_pad_0, pad_type = var_2616_pad_type_0, strides = var_2616_strides_0, weight = layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = tensor("op_2616_cast_fp16")]; + tensor var_2622_pad_type_0 = const()[name = tensor("op_2622_pad_type_0"), val = tensor("valid")]; + tensor var_2622_strides_0 = const()[name = tensor("op_2622_strides_0"), val = tensor([1, 1])]; + tensor var_2622_pad_0 = const()[name = tensor("op_2622_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2622_dilations_0 = const()[name = tensor("op_2622_dilations_0"), val = tensor([1, 1])]; + tensor var_2622_groups_0 = const()[name = tensor("op_2622_groups_0"), val = tensor(1)]; + tensor layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187358784))), name = tensor("layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187338816))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2622_cast_fp16 = conv(dilations = var_2622_dilations_0, groups = var_2622_groups_0, pad = var_2622_pad_0, pad_type = var_2622_pad_type_0, strides = var_2622_strides_0, weight = layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = tensor("op_2622_cast_fp16")]; + tensor obj_55_cast_fp16 = add(x = var_2616_cast_fp16, y = var_2622_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = obj_55_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; + tensor out_55_axes_0 = const()[name = tensor("out_55_axes_0"), val = tensor([1])]; + tensor var_2633_to_fp16 = const()[name = tensor("op_2633_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2633_to_fp16, x = inputs_55_cast_fp16)[name = tensor("out_55_cast_fp16")]; + tensor input_107_gamma_0_to_fp16 = const()[name = tensor("input_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187563648)))]; + tensor input_107_beta_0_to_fp16 = const()[name = tensor("input_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187566272)))]; + tensor input_107_epsilon_0_to_fp16 = const()[name = tensor("input_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_107_cast_fp16 = batch_norm(beta = input_107_beta_0_to_fp16, epsilon = input_107_epsilon_0_to_fp16, gamma = input_107_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor var_2651_pad_type_0 = const()[name = tensor("op_2651_pad_type_0"), val = tensor("valid")]; + tensor var_2651_strides_0 = const()[name = tensor("op_2651_strides_0"), val = tensor([1, 1])]; + tensor var_2651_pad_0 = const()[name = tensor("op_2651_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2651_dilations_0 = const()[name = tensor("op_2651_dilations_0"), val = tensor([1, 1])]; + tensor var_2651_groups_0 = const()[name = tensor("op_2651_groups_0"), val = tensor(1)]; + tensor layers_13_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187568896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190845760))), name = tensor("layers_13_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_13_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190845888)))]; + tensor var_2651_cast_fp16 = conv(bias = layers_13_fc1_inlier_module_bias_to_fp16, dilations = var_2651_dilations_0, groups = var_2651_groups_0, pad = var_2651_pad_0, pad_type = var_2651_pad_type_0, strides = var_2651_strides_0, weight = layers_13_fc1_inlier_module_weight_to_fp16_palettized, x = input_107_cast_fp16)[name = tensor("op_2651_cast_fp16")]; + tensor var_2657_pad_type_0 = const()[name = tensor("op_2657_pad_type_0"), val = tensor("valid")]; + tensor var_2657_strides_0 = const()[name = tensor("op_2657_strides_0"), val = tensor([1, 1])]; + tensor var_2657_pad_0 = const()[name = tensor("op_2657_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2657_dilations_0 = const()[name = tensor("op_2657_dilations_0"), val = tensor([1, 1])]; + tensor var_2657_groups_0 = const()[name = tensor("op_2657_groups_0"), val = tensor(1)]; + tensor layers_13_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190903232))), name = tensor("layers_13_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190856192))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_2657_cast_fp16 = conv(dilations = var_2657_dilations_0, groups = var_2657_groups_0, pad = var_2657_pad_0, pad_type = var_2657_pad_type_0, strides = var_2657_strides_0, weight = layers_13_fc1_outlier_module_weight_to_fp16_sparsified, x = input_107_cast_fp16)[name = tensor("op_2657_cast_fp16")]; + tensor input_109_cast_fp16 = add(x = var_2651_cast_fp16, y = var_2657_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor input_111_mode_0 = const()[name = tensor("input_111_mode_0"), val = tensor("EXACT")]; + tensor input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_2668_pad_type_0 = const()[name = tensor("op_2668_pad_type_0"), val = tensor("valid")]; + tensor var_2668_strides_0 = const()[name = tensor("op_2668_strides_0"), val = tensor([1, 1])]; + tensor var_2668_pad_0 = const()[name = tensor("op_2668_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2668_dilations_0 = const()[name = tensor("op_2668_dilations_0"), val = tensor([1, 1])]; + tensor var_2668_groups_0 = const()[name = tensor("op_2668_groups_0"), val = tensor(1)]; + tensor layers_13_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191722496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(194999360))), name = tensor("layers_13_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_13_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(194999488)))]; + tensor var_2668_cast_fp16 = conv(bias = layers_13_fc2_inlier_module_bias_to_fp16, dilations = var_2668_dilations_0, groups = var_2668_groups_0, pad = var_2668_pad_0, pad_type = var_2668_pad_type_0, strides = var_2668_strides_0, weight = layers_13_fc2_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = tensor("op_2668_cast_fp16")]; + tensor var_2674_pad_type_0 = const()[name = tensor("op_2674_pad_type_0"), val = tensor("valid")]; + tensor var_2674_strides_0 = const()[name = tensor("op_2674_strides_0"), val = tensor([1, 1])]; + tensor var_2674_pad_0 = const()[name = tensor("op_2674_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2674_dilations_0 = const()[name = tensor("op_2674_dilations_0"), val = tensor([1, 1])]; + tensor var_2674_groups_0 = const()[name = tensor("op_2674_groups_0"), val = tensor(1)]; + tensor layers_13_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195176960))), name = tensor("layers_13_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195002112))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_2674_cast_fp16 = conv(dilations = var_2674_dilations_0, groups = var_2674_groups_0, pad = var_2674_pad_0, pad_type = var_2674_pad_type_0, strides = var_2674_strides_0, weight = layers_13_fc2_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = tensor("op_2674_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = var_2668_cast_fp16, y = var_2674_cast_fp16)[name = tensor("hidden_states_31_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; + tensor var_2684 = const()[name = tensor("op_2684"), val = tensor(3)]; + tensor out_57_axes_0 = const()[name = tensor("out_57_axes_0"), val = tensor([1])]; + tensor var_2703_to_fp16 = const()[name = tensor("op_2703_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2703_to_fp16, x = inputs_57_cast_fp16)[name = tensor("out_57_cast_fp16")]; + tensor obj_57_gamma_0_to_fp16 = const()[name = tensor("obj_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195996224)))]; + tensor obj_57_beta_0_to_fp16 = const()[name = tensor("obj_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195998848)))]; + tensor obj_57_epsilon_0_to_fp16 = const()[name = tensor("obj_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor("obj_57_cast_fp16")]; + tensor var_2725_pad_type_0 = const()[name = tensor("op_2725_pad_type_0"), val = tensor("valid")]; + tensor var_2725_strides_0 = const()[name = tensor("op_2725_strides_0"), val = tensor([1, 1])]; + tensor var_2725_pad_0 = const()[name = tensor("op_2725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2725_dilations_0 = const()[name = tensor("op_2725_dilations_0"), val = tensor([1, 1])]; + tensor var_2725_groups_0 = const()[name = tensor("op_2725_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196001472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196820736))), name = tensor("layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_14_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196820864)))]; + tensor var_2725_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2725_dilations_0, groups = var_2725_groups_0, pad = var_2725_pad_0, pad_type = var_2725_pad_type_0, strides = var_2725_strides_0, weight = layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_2725_cast_fp16")]; + tensor var_2731_pad_type_0 = const()[name = tensor("op_2731_pad_type_0"), val = tensor("valid")]; + tensor var_2731_strides_0 = const()[name = tensor("op_2731_strides_0"), val = tensor([1, 1])]; + tensor var_2731_pad_0 = const()[name = tensor("op_2731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2731_dilations_0 = const()[name = tensor("op_2731_dilations_0"), val = tensor([1, 1])]; + tensor var_2731_groups_0 = const()[name = tensor("op_2731_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196868352))), name = tensor("layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196823488))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2731_cast_fp16 = conv(dilations = var_2731_dilations_0, groups = var_2731_groups_0, pad = var_2731_pad_0, pad_type = var_2731_pad_type_0, strides = var_2731_strides_0, weight = layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_2731_cast_fp16")]; + tensor query_29_cast_fp16 = add(x = var_2725_cast_fp16, y = var_2731_cast_fp16)[name = tensor("query_29_cast_fp16")]; + tensor var_2740_pad_type_0 = const()[name = tensor("op_2740_pad_type_0"), val = tensor("valid")]; + tensor var_2740_strides_0 = const()[name = tensor("op_2740_strides_0"), val = tensor([1, 1])]; + tensor var_2740_pad_0 = const()[name = tensor("op_2740_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2740_dilations_0 = const()[name = tensor("op_2740_dilations_0"), val = tensor([1, 1])]; + tensor var_2740_groups_0 = const()[name = tensor("op_2740_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197073216))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197892480))), name = tensor("layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2740_cast_fp16 = conv(dilations = var_2740_dilations_0, groups = var_2740_groups_0, pad = var_2740_pad_0, pad_type = var_2740_pad_type_0, strides = var_2740_strides_0, weight = layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_2740_cast_fp16")]; + tensor var_2746_pad_type_0 = const()[name = tensor("op_2746_pad_type_0"), val = tensor("valid")]; + tensor var_2746_strides_0 = const()[name = tensor("op_2746_strides_0"), val = tensor([1, 1])]; + tensor var_2746_pad_0 = const()[name = tensor("op_2746_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2746_dilations_0 = const()[name = tensor("op_2746_dilations_0"), val = tensor([1, 1])]; + tensor var_2746_groups_0 = const()[name = tensor("op_2746_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197921344))), name = tensor("layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197892608))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2746_cast_fp16 = conv(dilations = var_2746_dilations_0, groups = var_2746_groups_0, pad = var_2746_pad_0, pad_type = var_2746_pad_type_0, strides = var_2746_strides_0, weight = layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_2746_cast_fp16")]; + tensor key_29_cast_fp16 = add(x = var_2740_cast_fp16, y = var_2746_cast_fp16)[name = tensor("key_29_cast_fp16")]; + tensor var_2756_pad_type_0 = const()[name = tensor("op_2756_pad_type_0"), val = tensor("valid")]; + tensor var_2756_strides_0 = const()[name = tensor("op_2756_strides_0"), val = tensor([1, 1])]; + tensor var_2756_pad_0 = const()[name = tensor("op_2756_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2756_dilations_0 = const()[name = tensor("op_2756_dilations_0"), val = tensor([1, 1])]; + tensor var_2756_groups_0 = const()[name = tensor("op_2756_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198126208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198945472))), name = tensor("layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_14_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198945600)))]; + tensor var_2756_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2756_dilations_0, groups = var_2756_groups_0, pad = var_2756_pad_0, pad_type = var_2756_pad_type_0, strides = var_2756_strides_0, weight = layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_2756_cast_fp16")]; + tensor var_2762_pad_type_0 = const()[name = tensor("op_2762_pad_type_0"), val = tensor("valid")]; + tensor var_2762_strides_0 = const()[name = tensor("op_2762_strides_0"), val = tensor([1, 1])]; + tensor var_2762_pad_0 = const()[name = tensor("op_2762_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2762_dilations_0 = const()[name = tensor("op_2762_dilations_0"), val = tensor([1, 1])]; + tensor var_2762_groups_0 = const()[name = tensor("op_2762_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198963968))), name = tensor("layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198948224))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2762_cast_fp16 = conv(dilations = var_2762_dilations_0, groups = var_2762_groups_0, pad = var_2762_pad_0, pad_type = var_2762_pad_type_0, strides = var_2762_strides_0, weight = layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_2762_cast_fp16")]; + tensor value_29_cast_fp16 = add(x = var_2756_cast_fp16, y = var_2762_cast_fp16)[name = tensor("value_29_cast_fp16")]; + tensor var_2765 = const()[name = tensor("op_2765"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_29_cast_fp16 = reshape(shape = var_2765, x = query_29_cast_fp16)[name = tensor("mh_q_29_cast_fp16")]; + tensor var_2767_to_fp16 = const()[name = tensor("op_2767_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2768_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2767_to_fp16)[name = tensor("op_2768_cast_fp16")]; + tensor var_2769 = const()[name = tensor("op_2769"), val = tensor([1, 20, 64, -1])]; + tensor var_2770_cast_fp16 = reshape(shape = var_2769, x = key_29_cast_fp16)[name = tensor("op_2770_cast_fp16")]; + tensor mh_w_29_transpose_x_0 = const()[name = tensor("mh_w_29_transpose_x_0"), val = tensor(true)]; + tensor mh_w_29_transpose_y_0 = const()[name = tensor("mh_w_29_transpose_y_0"), val = tensor(false)]; + tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_2768_cast_fp16, y = var_2770_cast_fp16)[name = tensor("mh_w_29_cast_fp16")]; + tensor var_2773_cast_fp16 = softmax(axis = var_2684, x = mh_w_29_cast_fp16)[name = tensor("op_2773_cast_fp16")]; + tensor var_2774 = const()[name = tensor("op_2774"), val = tensor([1, 20, 64, -1])]; + tensor var_2775_cast_fp16 = reshape(shape = var_2774, x = value_29_cast_fp16)[name = tensor("op_2775_cast_fp16")]; + tensor attn_29_transpose_x_0 = const()[name = tensor("attn_29_transpose_x_0"), val = tensor(false)]; + tensor attn_29_transpose_y_0 = const()[name = tensor("attn_29_transpose_y_0"), val = tensor(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2775_cast_fp16, y = var_2773_cast_fp16)[name = tensor("attn_29_cast_fp16")]; + tensor var_2778 = const()[name = tensor("op_2778"), val = tensor([1, 1280, 1, -1])]; + tensor input_113_cast_fp16 = reshape(shape = var_2778, x = attn_29_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_2788_pad_type_0 = const()[name = tensor("op_2788_pad_type_0"), val = tensor("valid")]; + tensor var_2788_strides_0 = const()[name = tensor("op_2788_strides_0"), val = tensor([1, 1])]; + tensor var_2788_pad_0 = const()[name = tensor("op_2788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2788_dilations_0 = const()[name = tensor("op_2788_dilations_0"), val = tensor([1, 1])]; + tensor var_2788_groups_0 = const()[name = tensor("op_2788_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199168832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199988096))), name = tensor("layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_14_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199988224)))]; + tensor var_2788_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2788_dilations_0, groups = var_2788_groups_0, pad = var_2788_pad_0, pad_type = var_2788_pad_type_0, strides = var_2788_strides_0, weight = layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor("op_2788_cast_fp16")]; + tensor var_2794_pad_type_0 = const()[name = tensor("op_2794_pad_type_0"), val = tensor("valid")]; + tensor var_2794_strides_0 = const()[name = tensor("op_2794_strides_0"), val = tensor([1, 1])]; + tensor var_2794_pad_0 = const()[name = tensor("op_2794_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2794_dilations_0 = const()[name = tensor("op_2794_dilations_0"), val = tensor([1, 1])]; + tensor var_2794_groups_0 = const()[name = tensor("op_2794_groups_0"), val = tensor(1)]; + tensor layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200005632))), name = tensor("layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199990848))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2794_cast_fp16 = conv(dilations = var_2794_dilations_0, groups = var_2794_groups_0, pad = var_2794_pad_0, pad_type = var_2794_pad_type_0, strides = var_2794_strides_0, weight = layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = tensor("op_2794_cast_fp16")]; + tensor obj_59_cast_fp16 = add(x = var_2788_cast_fp16, y = var_2794_cast_fp16)[name = tensor("obj_59_cast_fp16")]; + tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_59_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; + tensor out_59_axes_0 = const()[name = tensor("out_59_axes_0"), val = tensor([1])]; + tensor var_2805_to_fp16 = const()[name = tensor("op_2805_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2805_to_fp16, x = inputs_59_cast_fp16)[name = tensor("out_59_cast_fp16")]; + tensor input_115_gamma_0_to_fp16 = const()[name = tensor("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200210496)))]; + tensor input_115_beta_0_to_fp16 = const()[name = tensor("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200213120)))]; + tensor input_115_epsilon_0_to_fp16 = const()[name = tensor("input_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_2823_pad_type_0 = const()[name = tensor("op_2823_pad_type_0"), val = tensor("valid")]; + tensor var_2823_strides_0 = const()[name = tensor("op_2823_strides_0"), val = tensor([1, 1])]; + tensor var_2823_pad_0 = const()[name = tensor("op_2823_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2823_dilations_0 = const()[name = tensor("op_2823_dilations_0"), val = tensor([1, 1])]; + tensor var_2823_groups_0 = const()[name = tensor("op_2823_groups_0"), val = tensor(1)]; + tensor layers_14_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200215744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203492608))), name = tensor("layers_14_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_14_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203492736)))]; + tensor var_2823_cast_fp16 = conv(bias = layers_14_fc1_inlier_module_bias_to_fp16, dilations = var_2823_dilations_0, groups = var_2823_groups_0, pad = var_2823_pad_0, pad_type = var_2823_pad_type_0, strides = var_2823_strides_0, weight = layers_14_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = tensor("op_2823_cast_fp16")]; + tensor var_2829_pad_type_0 = const()[name = tensor("op_2829_pad_type_0"), val = tensor("valid")]; + tensor var_2829_strides_0 = const()[name = tensor("op_2829_strides_0"), val = tensor([1, 1])]; + tensor var_2829_pad_0 = const()[name = tensor("op_2829_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2829_dilations_0 = const()[name = tensor("op_2829_dilations_0"), val = tensor([1, 1])]; + tensor var_2829_groups_0 = const()[name = tensor("op_2829_groups_0"), val = tensor(1)]; + tensor layers_14_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203552448))), name = tensor("layers_14_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203503040))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_2829_cast_fp16 = conv(dilations = var_2829_dilations_0, groups = var_2829_groups_0, pad = var_2829_pad_0, pad_type = var_2829_pad_type_0, strides = var_2829_strides_0, weight = layers_14_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = tensor("op_2829_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = var_2823_cast_fp16, y = var_2829_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor input_119_mode_0 = const()[name = tensor("input_119_mode_0"), val = tensor("EXACT")]; + tensor input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = tensor("input_119_cast_fp16")]; + tensor var_2840_pad_type_0 = const()[name = tensor("op_2840_pad_type_0"), val = tensor("valid")]; + tensor var_2840_strides_0 = const()[name = tensor("op_2840_strides_0"), val = tensor([1, 1])]; + tensor var_2840_pad_0 = const()[name = tensor("op_2840_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2840_dilations_0 = const()[name = tensor("op_2840_dilations_0"), val = tensor([1, 1])]; + tensor var_2840_groups_0 = const()[name = tensor("op_2840_groups_0"), val = tensor(1)]; + tensor layers_14_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204371712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207648576))), name = tensor("layers_14_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_14_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207648704)))]; + tensor var_2840_cast_fp16 = conv(bias = layers_14_fc2_inlier_module_bias_to_fp16, dilations = var_2840_dilations_0, groups = var_2840_groups_0, pad = var_2840_pad_0, pad_type = var_2840_pad_type_0, strides = var_2840_strides_0, weight = layers_14_fc2_inlier_module_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = tensor("op_2840_cast_fp16")]; + tensor var_2846_pad_type_0 = const()[name = tensor("op_2846_pad_type_0"), val = tensor("valid")]; + tensor var_2846_strides_0 = const()[name = tensor("op_2846_strides_0"), val = tensor([1, 1])]; + tensor var_2846_pad_0 = const()[name = tensor("op_2846_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2846_dilations_0 = const()[name = tensor("op_2846_dilations_0"), val = tensor([1, 1])]; + tensor var_2846_groups_0 = const()[name = tensor("op_2846_groups_0"), val = tensor(1)]; + tensor layers_14_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207800128))), name = tensor("layers_14_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207651328))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_2846_cast_fp16 = conv(dilations = var_2846_dilations_0, groups = var_2846_groups_0, pad = var_2846_pad_0, pad_type = var_2846_pad_type_0, strides = var_2846_strides_0, weight = layers_14_fc2_outlier_module_weight_to_fp16_sparsified, x = input_119_cast_fp16)[name = tensor("op_2846_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = add(x = var_2840_cast_fp16, y = var_2846_cast_fp16)[name = tensor("hidden_states_33_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_33_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; + tensor var_2856 = const()[name = tensor("op_2856"), val = tensor(3)]; + tensor out_61_axes_0 = const()[name = tensor("out_61_axes_0"), val = tensor([1])]; + tensor var_2875_to_fp16 = const()[name = tensor("op_2875_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2875_to_fp16, x = inputs_61_cast_fp16)[name = tensor("out_61_cast_fp16")]; + tensor obj_61_gamma_0_to_fp16 = const()[name = tensor("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208619392)))]; + tensor obj_61_beta_0_to_fp16 = const()[name = tensor("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208622016)))]; + tensor obj_61_epsilon_0_to_fp16 = const()[name = tensor("obj_61_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor("obj_61_cast_fp16")]; + tensor var_2897_pad_type_0 = const()[name = tensor("op_2897_pad_type_0"), val = tensor("valid")]; + tensor var_2897_strides_0 = const()[name = tensor("op_2897_strides_0"), val = tensor([1, 1])]; + tensor var_2897_pad_0 = const()[name = tensor("op_2897_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2897_dilations_0 = const()[name = tensor("op_2897_dilations_0"), val = tensor([1, 1])]; + tensor var_2897_groups_0 = const()[name = tensor("op_2897_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208624640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209443904))), name = tensor("layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_15_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209444032)))]; + tensor var_2897_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2897_dilations_0, groups = var_2897_groups_0, pad = var_2897_pad_0, pad_type = var_2897_pad_type_0, strides = var_2897_strides_0, weight = layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_2897_cast_fp16")]; + tensor var_2903_pad_type_0 = const()[name = tensor("op_2903_pad_type_0"), val = tensor("valid")]; + tensor var_2903_strides_0 = const()[name = tensor("op_2903_strides_0"), val = tensor([1, 1])]; + tensor var_2903_pad_0 = const()[name = tensor("op_2903_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2903_dilations_0 = const()[name = tensor("op_2903_dilations_0"), val = tensor([1, 1])]; + tensor var_2903_groups_0 = const()[name = tensor("op_2903_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209492416))), name = tensor("layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209446656))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2903_cast_fp16 = conv(dilations = var_2903_dilations_0, groups = var_2903_groups_0, pad = var_2903_pad_0, pad_type = var_2903_pad_type_0, strides = var_2903_strides_0, weight = layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_2903_cast_fp16")]; + tensor query_31_cast_fp16 = add(x = var_2897_cast_fp16, y = var_2903_cast_fp16)[name = tensor("query_31_cast_fp16")]; + tensor var_2912_pad_type_0 = const()[name = tensor("op_2912_pad_type_0"), val = tensor("valid")]; + tensor var_2912_strides_0 = const()[name = tensor("op_2912_strides_0"), val = tensor([1, 1])]; + tensor var_2912_pad_0 = const()[name = tensor("op_2912_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2912_dilations_0 = const()[name = tensor("op_2912_dilations_0"), val = tensor([1, 1])]; + tensor var_2912_groups_0 = const()[name = tensor("op_2912_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209697280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210516544))), name = tensor("layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2912_cast_fp16 = conv(dilations = var_2912_dilations_0, groups = var_2912_groups_0, pad = var_2912_pad_0, pad_type = var_2912_pad_type_0, strides = var_2912_strides_0, weight = layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_2912_cast_fp16")]; + tensor var_2918_pad_type_0 = const()[name = tensor("op_2918_pad_type_0"), val = tensor("valid")]; + tensor var_2918_strides_0 = const()[name = tensor("op_2918_strides_0"), val = tensor([1, 1])]; + tensor var_2918_pad_0 = const()[name = tensor("op_2918_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2918_dilations_0 = const()[name = tensor("op_2918_dilations_0"), val = tensor([1, 1])]; + tensor var_2918_groups_0 = const()[name = tensor("op_2918_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210542144))), name = tensor("layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210516672))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2918_cast_fp16 = conv(dilations = var_2918_dilations_0, groups = var_2918_groups_0, pad = var_2918_pad_0, pad_type = var_2918_pad_type_0, strides = var_2918_strides_0, weight = layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_2918_cast_fp16")]; + tensor key_31_cast_fp16 = add(x = var_2912_cast_fp16, y = var_2918_cast_fp16)[name = tensor("key_31_cast_fp16")]; + tensor var_2928_pad_type_0 = const()[name = tensor("op_2928_pad_type_0"), val = tensor("valid")]; + tensor var_2928_strides_0 = const()[name = tensor("op_2928_strides_0"), val = tensor([1, 1])]; + tensor var_2928_pad_0 = const()[name = tensor("op_2928_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2928_dilations_0 = const()[name = tensor("op_2928_dilations_0"), val = tensor([1, 1])]; + tensor var_2928_groups_0 = const()[name = tensor("op_2928_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210747008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211566272))), name = tensor("layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_15_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211566400)))]; + tensor var_2928_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2928_dilations_0, groups = var_2928_groups_0, pad = var_2928_pad_0, pad_type = var_2928_pad_type_0, strides = var_2928_strides_0, weight = layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_2928_cast_fp16")]; + tensor var_2934_pad_type_0 = const()[name = tensor("op_2934_pad_type_0"), val = tensor("valid")]; + tensor var_2934_strides_0 = const()[name = tensor("op_2934_strides_0"), val = tensor([1, 1])]; + tensor var_2934_pad_0 = const()[name = tensor("op_2934_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2934_dilations_0 = const()[name = tensor("op_2934_dilations_0"), val = tensor([1, 1])]; + tensor var_2934_groups_0 = const()[name = tensor("op_2934_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211586240))), name = tensor("layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211569024))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2934_cast_fp16 = conv(dilations = var_2934_dilations_0, groups = var_2934_groups_0, pad = var_2934_pad_0, pad_type = var_2934_pad_type_0, strides = var_2934_strides_0, weight = layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_2934_cast_fp16")]; + tensor value_31_cast_fp16 = add(x = var_2928_cast_fp16, y = var_2934_cast_fp16)[name = tensor("value_31_cast_fp16")]; + tensor var_2937 = const()[name = tensor("op_2937"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_31_cast_fp16 = reshape(shape = var_2937, x = query_31_cast_fp16)[name = tensor("mh_q_31_cast_fp16")]; + tensor var_2939_to_fp16 = const()[name = tensor("op_2939_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2940_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2939_to_fp16)[name = tensor("op_2940_cast_fp16")]; + tensor var_2941 = const()[name = tensor("op_2941"), val = tensor([1, 20, 64, -1])]; + tensor var_2942_cast_fp16 = reshape(shape = var_2941, x = key_31_cast_fp16)[name = tensor("op_2942_cast_fp16")]; + tensor mh_w_31_transpose_x_0 = const()[name = tensor("mh_w_31_transpose_x_0"), val = tensor(true)]; + tensor mh_w_31_transpose_y_0 = const()[name = tensor("mh_w_31_transpose_y_0"), val = tensor(false)]; + tensor mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_2940_cast_fp16, y = var_2942_cast_fp16)[name = tensor("mh_w_31_cast_fp16")]; + tensor var_2945_cast_fp16 = softmax(axis = var_2856, x = mh_w_31_cast_fp16)[name = tensor("op_2945_cast_fp16")]; + tensor var_2946 = const()[name = tensor("op_2946"), val = tensor([1, 20, 64, -1])]; + tensor var_2947_cast_fp16 = reshape(shape = var_2946, x = value_31_cast_fp16)[name = tensor("op_2947_cast_fp16")]; + tensor attn_31_transpose_x_0 = const()[name = tensor("attn_31_transpose_x_0"), val = tensor(false)]; + tensor attn_31_transpose_y_0 = const()[name = tensor("attn_31_transpose_y_0"), val = tensor(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2947_cast_fp16, y = var_2945_cast_fp16)[name = tensor("attn_31_cast_fp16")]; + tensor var_2950 = const()[name = tensor("op_2950"), val = tensor([1, 1280, 1, -1])]; + tensor input_121_cast_fp16 = reshape(shape = var_2950, x = attn_31_cast_fp16)[name = tensor("input_121_cast_fp16")]; + tensor var_2960_pad_type_0 = const()[name = tensor("op_2960_pad_type_0"), val = tensor("valid")]; + tensor var_2960_strides_0 = const()[name = tensor("op_2960_strides_0"), val = tensor([1, 1])]; + tensor var_2960_pad_0 = const()[name = tensor("op_2960_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2960_dilations_0 = const()[name = tensor("op_2960_dilations_0"), val = tensor([1, 1])]; + tensor var_2960_groups_0 = const()[name = tensor("op_2960_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211791104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212610368))), name = tensor("layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_15_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212610496)))]; + tensor var_2960_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2960_dilations_0, groups = var_2960_groups_0, pad = var_2960_pad_0, pad_type = var_2960_pad_type_0, strides = var_2960_strides_0, weight = layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = tensor("op_2960_cast_fp16")]; + tensor var_2966_pad_type_0 = const()[name = tensor("op_2966_pad_type_0"), val = tensor("valid")]; + tensor var_2966_strides_0 = const()[name = tensor("op_2966_strides_0"), val = tensor([1, 1])]; + tensor var_2966_pad_0 = const()[name = tensor("op_2966_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2966_dilations_0 = const()[name = tensor("op_2966_dilations_0"), val = tensor([1, 1])]; + tensor var_2966_groups_0 = const()[name = tensor("op_2966_groups_0"), val = tensor(1)]; + tensor layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212633664))), name = tensor("layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212613120))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_2966_cast_fp16 = conv(dilations = var_2966_dilations_0, groups = var_2966_groups_0, pad = var_2966_pad_0, pad_type = var_2966_pad_type_0, strides = var_2966_strides_0, weight = layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_121_cast_fp16)[name = tensor("op_2966_cast_fp16")]; + tensor obj_63_cast_fp16 = add(x = var_2960_cast_fp16, y = var_2966_cast_fp16)[name = tensor("obj_63_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_63_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; + tensor out_63_axes_0 = const()[name = tensor("out_63_axes_0"), val = tensor([1])]; + tensor var_2977_to_fp16 = const()[name = tensor("op_2977_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_2977_to_fp16, x = inputs_63_cast_fp16)[name = tensor("out_63_cast_fp16")]; + tensor input_123_gamma_0_to_fp16 = const()[name = tensor("input_123_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212838528)))]; + tensor input_123_beta_0_to_fp16 = const()[name = tensor("input_123_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212841152)))]; + tensor input_123_epsilon_0_to_fp16 = const()[name = tensor("input_123_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_123_cast_fp16 = batch_norm(beta = input_123_beta_0_to_fp16, epsilon = input_123_epsilon_0_to_fp16, gamma = input_123_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor("input_123_cast_fp16")]; + tensor var_2995_pad_type_0 = const()[name = tensor("op_2995_pad_type_0"), val = tensor("valid")]; + tensor var_2995_strides_0 = const()[name = tensor("op_2995_strides_0"), val = tensor([1, 1])]; + tensor var_2995_pad_0 = const()[name = tensor("op_2995_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2995_dilations_0 = const()[name = tensor("op_2995_dilations_0"), val = tensor([1, 1])]; + tensor var_2995_groups_0 = const()[name = tensor("op_2995_groups_0"), val = tensor(1)]; + tensor layers_15_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212843776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216120640))), name = tensor("layers_15_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_15_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216120768)))]; + tensor var_2995_cast_fp16 = conv(bias = layers_15_fc1_inlier_module_bias_to_fp16, dilations = var_2995_dilations_0, groups = var_2995_groups_0, pad = var_2995_pad_0, pad_type = var_2995_pad_type_0, strides = var_2995_strides_0, weight = layers_15_fc1_inlier_module_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = tensor("op_2995_cast_fp16")]; + tensor var_3001_pad_type_0 = const()[name = tensor("op_3001_pad_type_0"), val = tensor("valid")]; + tensor var_3001_strides_0 = const()[name = tensor("op_3001_strides_0"), val = tensor([1, 1])]; + tensor var_3001_pad_0 = const()[name = tensor("op_3001_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3001_dilations_0 = const()[name = tensor("op_3001_dilations_0"), val = tensor([1, 1])]; + tensor var_3001_groups_0 = const()[name = tensor("op_3001_groups_0"), val = tensor(1)]; + tensor layers_15_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216176896))), name = tensor("layers_15_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216131072))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3001_cast_fp16 = conv(dilations = var_3001_dilations_0, groups = var_3001_groups_0, pad = var_3001_pad_0, pad_type = var_3001_pad_type_0, strides = var_3001_strides_0, weight = layers_15_fc1_outlier_module_weight_to_fp16_sparsified, x = input_123_cast_fp16)[name = tensor("op_3001_cast_fp16")]; + tensor input_125_cast_fp16 = add(x = var_2995_cast_fp16, y = var_3001_cast_fp16)[name = tensor("input_125_cast_fp16")]; + tensor input_127_mode_0 = const()[name = tensor("input_127_mode_0"), val = tensor("EXACT")]; + tensor input_127_cast_fp16 = gelu(mode = input_127_mode_0, x = input_125_cast_fp16)[name = tensor("input_127_cast_fp16")]; + tensor var_3012_pad_type_0 = const()[name = tensor("op_3012_pad_type_0"), val = tensor("valid")]; + tensor var_3012_strides_0 = const()[name = tensor("op_3012_strides_0"), val = tensor([1, 1])]; + tensor var_3012_pad_0 = const()[name = tensor("op_3012_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3012_dilations_0 = const()[name = tensor("op_3012_dilations_0"), val = tensor([1, 1])]; + tensor var_3012_groups_0 = const()[name = tensor("op_3012_groups_0"), val = tensor(1)]; + tensor layers_15_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216996160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220273024))), name = tensor("layers_15_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_15_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220273152)))]; + tensor var_3012_cast_fp16 = conv(bias = layers_15_fc2_inlier_module_bias_to_fp16, dilations = var_3012_dilations_0, groups = var_3012_groups_0, pad = var_3012_pad_0, pad_type = var_3012_pad_type_0, strides = var_3012_strides_0, weight = layers_15_fc2_inlier_module_weight_to_fp16_palettized, x = input_127_cast_fp16)[name = tensor("op_3012_cast_fp16")]; + tensor var_3018_pad_type_0 = const()[name = tensor("op_3018_pad_type_0"), val = tensor("valid")]; + tensor var_3018_strides_0 = const()[name = tensor("op_3018_strides_0"), val = tensor([1, 1])]; + tensor var_3018_pad_0 = const()[name = tensor("op_3018_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3018_dilations_0 = const()[name = tensor("op_3018_dilations_0"), val = tensor([1, 1])]; + tensor var_3018_groups_0 = const()[name = tensor("op_3018_groups_0"), val = tensor(1)]; + tensor layers_15_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220449024))), name = tensor("layers_15_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220275776))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3018_cast_fp16 = conv(dilations = var_3018_dilations_0, groups = var_3018_groups_0, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3018_strides_0, weight = layers_15_fc2_outlier_module_weight_to_fp16_sparsified, x = input_127_cast_fp16)[name = tensor("op_3018_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = add(x = var_3012_cast_fp16, y = var_3018_cast_fp16)[name = tensor("hidden_states_35_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = hidden_states_35_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; + tensor var_3028 = const()[name = tensor("op_3028"), val = tensor(3)]; + tensor out_65_axes_0 = const()[name = tensor("out_65_axes_0"), val = tensor([1])]; + tensor var_3047_to_fp16 = const()[name = tensor("op_3047_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3047_to_fp16, x = inputs_65_cast_fp16)[name = tensor("out_65_cast_fp16")]; + tensor obj_65_gamma_0_to_fp16 = const()[name = tensor("obj_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221268288)))]; + tensor obj_65_beta_0_to_fp16 = const()[name = tensor("obj_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221270912)))]; + tensor obj_65_epsilon_0_to_fp16 = const()[name = tensor("obj_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor("obj_65_cast_fp16")]; + tensor var_3069_pad_type_0 = const()[name = tensor("op_3069_pad_type_0"), val = tensor("valid")]; + tensor var_3069_strides_0 = const()[name = tensor("op_3069_strides_0"), val = tensor([1, 1])]; + tensor var_3069_pad_0 = const()[name = tensor("op_3069_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3069_dilations_0 = const()[name = tensor("op_3069_dilations_0"), val = tensor([1, 1])]; + tensor var_3069_groups_0 = const()[name = tensor("op_3069_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221273536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222092800))), name = tensor("layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_16_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222092928)))]; + tensor var_3069_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3069_dilations_0, groups = var_3069_groups_0, pad = var_3069_pad_0, pad_type = var_3069_pad_type_0, strides = var_3069_strides_0, weight = layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_3069_cast_fp16")]; + tensor var_3075_pad_type_0 = const()[name = tensor("op_3075_pad_type_0"), val = tensor("valid")]; + tensor var_3075_strides_0 = const()[name = tensor("op_3075_strides_0"), val = tensor([1, 1])]; + tensor var_3075_pad_0 = const()[name = tensor("op_3075_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3075_dilations_0 = const()[name = tensor("op_3075_dilations_0"), val = tensor([1, 1])]; + tensor var_3075_groups_0 = const()[name = tensor("op_3075_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222164416))), name = tensor("layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222095552))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3075_cast_fp16 = conv(dilations = var_3075_dilations_0, groups = var_3075_groups_0, pad = var_3075_pad_0, pad_type = var_3075_pad_type_0, strides = var_3075_strides_0, weight = layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_3075_cast_fp16")]; + tensor query_33_cast_fp16 = add(x = var_3069_cast_fp16, y = var_3075_cast_fp16)[name = tensor("query_33_cast_fp16")]; + tensor var_3084_pad_type_0 = const()[name = tensor("op_3084_pad_type_0"), val = tensor("valid")]; + tensor var_3084_strides_0 = const()[name = tensor("op_3084_strides_0"), val = tensor([1, 1])]; + tensor var_3084_pad_0 = const()[name = tensor("op_3084_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3084_dilations_0 = const()[name = tensor("op_3084_dilations_0"), val = tensor([1, 1])]; + tensor var_3084_groups_0 = const()[name = tensor("op_3084_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222369280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223188544))), name = tensor("layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3084_cast_fp16 = conv(dilations = var_3084_dilations_0, groups = var_3084_groups_0, pad = var_3084_pad_0, pad_type = var_3084_pad_type_0, strides = var_3084_strides_0, weight = layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_3084_cast_fp16")]; + tensor var_3090_pad_type_0 = const()[name = tensor("op_3090_pad_type_0"), val = tensor("valid")]; + tensor var_3090_strides_0 = const()[name = tensor("op_3090_strides_0"), val = tensor([1, 1])]; + tensor var_3090_pad_0 = const()[name = tensor("op_3090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3090_dilations_0 = const()[name = tensor("op_3090_dilations_0"), val = tensor([1, 1])]; + tensor var_3090_groups_0 = const()[name = tensor("op_3090_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223217792))), name = tensor("layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223188672))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3090_cast_fp16 = conv(dilations = var_3090_dilations_0, groups = var_3090_groups_0, pad = var_3090_pad_0, pad_type = var_3090_pad_type_0, strides = var_3090_strides_0, weight = layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_3090_cast_fp16")]; + tensor key_33_cast_fp16 = add(x = var_3084_cast_fp16, y = var_3090_cast_fp16)[name = tensor("key_33_cast_fp16")]; + tensor var_3100_pad_type_0 = const()[name = tensor("op_3100_pad_type_0"), val = tensor("valid")]; + tensor var_3100_strides_0 = const()[name = tensor("op_3100_strides_0"), val = tensor([1, 1])]; + tensor var_3100_pad_0 = const()[name = tensor("op_3100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3100_dilations_0 = const()[name = tensor("op_3100_dilations_0"), val = tensor([1, 1])]; + tensor var_3100_groups_0 = const()[name = tensor("op_3100_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223422656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224241920))), name = tensor("layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_16_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224242048)))]; + tensor var_3100_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3100_dilations_0, groups = var_3100_groups_0, pad = var_3100_pad_0, pad_type = var_3100_pad_type_0, strides = var_3100_strides_0, weight = layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_3100_cast_fp16")]; + tensor var_3106_pad_type_0 = const()[name = tensor("op_3106_pad_type_0"), val = tensor("valid")]; + tensor var_3106_strides_0 = const()[name = tensor("op_3106_strides_0"), val = tensor([1, 1])]; + tensor var_3106_pad_0 = const()[name = tensor("op_3106_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3106_dilations_0 = const()[name = tensor("op_3106_dilations_0"), val = tensor([1, 1])]; + tensor var_3106_groups_0 = const()[name = tensor("op_3106_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224261888))), name = tensor("layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224244672))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3106_cast_fp16 = conv(dilations = var_3106_dilations_0, groups = var_3106_groups_0, pad = var_3106_pad_0, pad_type = var_3106_pad_type_0, strides = var_3106_strides_0, weight = layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_3106_cast_fp16")]; + tensor value_33_cast_fp16 = add(x = var_3100_cast_fp16, y = var_3106_cast_fp16)[name = tensor("value_33_cast_fp16")]; + tensor var_3109 = const()[name = tensor("op_3109"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_33_cast_fp16 = reshape(shape = var_3109, x = query_33_cast_fp16)[name = tensor("mh_q_33_cast_fp16")]; + tensor var_3111_to_fp16 = const()[name = tensor("op_3111_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3112_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_3111_to_fp16)[name = tensor("op_3112_cast_fp16")]; + tensor var_3113 = const()[name = tensor("op_3113"), val = tensor([1, 20, 64, -1])]; + tensor var_3114_cast_fp16 = reshape(shape = var_3113, x = key_33_cast_fp16)[name = tensor("op_3114_cast_fp16")]; + tensor mh_w_33_transpose_x_0 = const()[name = tensor("mh_w_33_transpose_x_0"), val = tensor(true)]; + tensor mh_w_33_transpose_y_0 = const()[name = tensor("mh_w_33_transpose_y_0"), val = tensor(false)]; + tensor mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_3112_cast_fp16, y = var_3114_cast_fp16)[name = tensor("mh_w_33_cast_fp16")]; + tensor var_3117_cast_fp16 = softmax(axis = var_3028, x = mh_w_33_cast_fp16)[name = tensor("op_3117_cast_fp16")]; + tensor var_3118 = const()[name = tensor("op_3118"), val = tensor([1, 20, 64, -1])]; + tensor var_3119_cast_fp16 = reshape(shape = var_3118, x = value_33_cast_fp16)[name = tensor("op_3119_cast_fp16")]; + tensor attn_33_transpose_x_0 = const()[name = tensor("attn_33_transpose_x_0"), val = tensor(false)]; + tensor attn_33_transpose_y_0 = const()[name = tensor("attn_33_transpose_y_0"), val = tensor(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_3119_cast_fp16, y = var_3117_cast_fp16)[name = tensor("attn_33_cast_fp16")]; + tensor var_3122 = const()[name = tensor("op_3122"), val = tensor([1, 1280, 1, -1])]; + tensor input_129_cast_fp16 = reshape(shape = var_3122, x = attn_33_cast_fp16)[name = tensor("input_129_cast_fp16")]; + tensor var_3132_pad_type_0 = const()[name = tensor("op_3132_pad_type_0"), val = tensor("valid")]; + tensor var_3132_strides_0 = const()[name = tensor("op_3132_strides_0"), val = tensor([1, 1])]; + tensor var_3132_pad_0 = const()[name = tensor("op_3132_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3132_dilations_0 = const()[name = tensor("op_3132_dilations_0"), val = tensor([1, 1])]; + tensor var_3132_groups_0 = const()[name = tensor("op_3132_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224466752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225286016))), name = tensor("layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_16_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225286144)))]; + tensor var_3132_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3132_dilations_0, groups = var_3132_groups_0, pad = var_3132_pad_0, pad_type = var_3132_pad_type_0, strides = var_3132_strides_0, weight = layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = tensor("op_3132_cast_fp16")]; + tensor var_3138_pad_type_0 = const()[name = tensor("op_3138_pad_type_0"), val = tensor("valid")]; + tensor var_3138_strides_0 = const()[name = tensor("op_3138_strides_0"), val = tensor([1, 1])]; + tensor var_3138_pad_0 = const()[name = tensor("op_3138_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3138_dilations_0 = const()[name = tensor("op_3138_dilations_0"), val = tensor([1, 1])]; + tensor var_3138_groups_0 = const()[name = tensor("op_3138_groups_0"), val = tensor(1)]; + tensor layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225307136))), name = tensor("layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225288768))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3138_cast_fp16 = conv(dilations = var_3138_dilations_0, groups = var_3138_groups_0, pad = var_3138_pad_0, pad_type = var_3138_pad_type_0, strides = var_3138_strides_0, weight = layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_129_cast_fp16)[name = tensor("op_3138_cast_fp16")]; + tensor obj_67_cast_fp16 = add(x = var_3132_cast_fp16, y = var_3138_cast_fp16)[name = tensor("obj_67_cast_fp16")]; + tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = obj_67_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; + tensor out_67_axes_0 = const()[name = tensor("out_67_axes_0"), val = tensor([1])]; + tensor var_3149_to_fp16 = const()[name = tensor("op_3149_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3149_to_fp16, x = inputs_67_cast_fp16)[name = tensor("out_67_cast_fp16")]; + tensor input_131_gamma_0_to_fp16 = const()[name = tensor("input_131_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225512000)))]; + tensor input_131_beta_0_to_fp16 = const()[name = tensor("input_131_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225514624)))]; + tensor input_131_epsilon_0_to_fp16 = const()[name = tensor("input_131_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_131_cast_fp16 = batch_norm(beta = input_131_beta_0_to_fp16, epsilon = input_131_epsilon_0_to_fp16, gamma = input_131_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor("input_131_cast_fp16")]; + tensor var_3167_pad_type_0 = const()[name = tensor("op_3167_pad_type_0"), val = tensor("valid")]; + tensor var_3167_strides_0 = const()[name = tensor("op_3167_strides_0"), val = tensor([1, 1])]; + tensor var_3167_pad_0 = const()[name = tensor("op_3167_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3167_dilations_0 = const()[name = tensor("op_3167_dilations_0"), val = tensor([1, 1])]; + tensor var_3167_groups_0 = const()[name = tensor("op_3167_groups_0"), val = tensor(1)]; + tensor layers_16_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225517248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228794112))), name = tensor("layers_16_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_16_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228794240)))]; + tensor var_3167_cast_fp16 = conv(bias = layers_16_fc1_inlier_module_bias_to_fp16, dilations = var_3167_dilations_0, groups = var_3167_groups_0, pad = var_3167_pad_0, pad_type = var_3167_pad_type_0, strides = var_3167_strides_0, weight = layers_16_fc1_inlier_module_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = tensor("op_3167_cast_fp16")]; + tensor var_3173_pad_type_0 = const()[name = tensor("op_3173_pad_type_0"), val = tensor("valid")]; + tensor var_3173_strides_0 = const()[name = tensor("op_3173_strides_0"), val = tensor([1, 1])]; + tensor var_3173_pad_0 = const()[name = tensor("op_3173_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3173_dilations_0 = const()[name = tensor("op_3173_dilations_0"), val = tensor([1, 1])]; + tensor var_3173_groups_0 = const()[name = tensor("op_3173_groups_0"), val = tensor(1)]; + tensor layers_16_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228879488))), name = tensor("layers_16_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228804544))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3173_cast_fp16 = conv(dilations = var_3173_dilations_0, groups = var_3173_groups_0, pad = var_3173_pad_0, pad_type = var_3173_pad_type_0, strides = var_3173_strides_0, weight = layers_16_fc1_outlier_module_weight_to_fp16_sparsified, x = input_131_cast_fp16)[name = tensor("op_3173_cast_fp16")]; + tensor input_133_cast_fp16 = add(x = var_3167_cast_fp16, y = var_3173_cast_fp16)[name = tensor("input_133_cast_fp16")]; + tensor input_135_mode_0 = const()[name = tensor("input_135_mode_0"), val = tensor("EXACT")]; + tensor input_135_cast_fp16 = gelu(mode = input_135_mode_0, x = input_133_cast_fp16)[name = tensor("input_135_cast_fp16")]; + tensor var_3184_pad_type_0 = const()[name = tensor("op_3184_pad_type_0"), val = tensor("valid")]; + tensor var_3184_strides_0 = const()[name = tensor("op_3184_strides_0"), val = tensor([1, 1])]; + tensor var_3184_pad_0 = const()[name = tensor("op_3184_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3184_dilations_0 = const()[name = tensor("op_3184_dilations_0"), val = tensor([1, 1])]; + tensor var_3184_groups_0 = const()[name = tensor("op_3184_groups_0"), val = tensor(1)]; + tensor layers_16_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(229698752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232975616))), name = tensor("layers_16_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_16_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232975744)))]; + tensor var_3184_cast_fp16 = conv(bias = layers_16_fc2_inlier_module_bias_to_fp16, dilations = var_3184_dilations_0, groups = var_3184_groups_0, pad = var_3184_pad_0, pad_type = var_3184_pad_type_0, strides = var_3184_strides_0, weight = layers_16_fc2_inlier_module_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = tensor("op_3184_cast_fp16")]; + tensor var_3190_pad_type_0 = const()[name = tensor("op_3190_pad_type_0"), val = tensor("valid")]; + tensor var_3190_strides_0 = const()[name = tensor("op_3190_strides_0"), val = tensor([1, 1])]; + tensor var_3190_pad_0 = const()[name = tensor("op_3190_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3190_dilations_0 = const()[name = tensor("op_3190_dilations_0"), val = tensor([1, 1])]; + tensor var_3190_groups_0 = const()[name = tensor("op_3190_groups_0"), val = tensor(1)]; + tensor layers_16_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233104448))), name = tensor("layers_16_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232978368))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3190_cast_fp16 = conv(dilations = var_3190_dilations_0, groups = var_3190_groups_0, pad = var_3190_pad_0, pad_type = var_3190_pad_type_0, strides = var_3190_strides_0, weight = layers_16_fc2_outlier_module_weight_to_fp16_sparsified, x = input_135_cast_fp16)[name = tensor("op_3190_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = var_3184_cast_fp16, y = var_3190_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; + tensor var_3200 = const()[name = tensor("op_3200"), val = tensor(3)]; + tensor out_69_axes_0 = const()[name = tensor("out_69_axes_0"), val = tensor([1])]; + tensor var_3219_to_fp16 = const()[name = tensor("op_3219_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3219_to_fp16, x = inputs_69_cast_fp16)[name = tensor("out_69_cast_fp16")]; + tensor obj_69_gamma_0_to_fp16 = const()[name = tensor("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233923712)))]; + tensor obj_69_beta_0_to_fp16 = const()[name = tensor("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233926336)))]; + tensor obj_69_epsilon_0_to_fp16 = const()[name = tensor("obj_69_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor("obj_69_cast_fp16")]; + tensor var_3241_pad_type_0 = const()[name = tensor("op_3241_pad_type_0"), val = tensor("valid")]; + tensor var_3241_strides_0 = const()[name = tensor("op_3241_strides_0"), val = tensor([1, 1])]; + tensor var_3241_pad_0 = const()[name = tensor("op_3241_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3241_dilations_0 = const()[name = tensor("op_3241_dilations_0"), val = tensor([1, 1])]; + tensor var_3241_groups_0 = const()[name = tensor("op_3241_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233928960))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234748224))), name = tensor("layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_17_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234748352)))]; + tensor var_3241_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3241_dilations_0, groups = var_3241_groups_0, pad = var_3241_pad_0, pad_type = var_3241_pad_type_0, strides = var_3241_strides_0, weight = layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_3241_cast_fp16")]; + tensor var_3247_pad_type_0 = const()[name = tensor("op_3247_pad_type_0"), val = tensor("valid")]; + tensor var_3247_strides_0 = const()[name = tensor("op_3247_strides_0"), val = tensor([1, 1])]; + tensor var_3247_pad_0 = const()[name = tensor("op_3247_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3247_dilations_0 = const()[name = tensor("op_3247_dilations_0"), val = tensor([1, 1])]; + tensor var_3247_groups_0 = const()[name = tensor("op_3247_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234801152))), name = tensor("layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234750976))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3247_cast_fp16 = conv(dilations = var_3247_dilations_0, groups = var_3247_groups_0, pad = var_3247_pad_0, pad_type = var_3247_pad_type_0, strides = var_3247_strides_0, weight = layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_3247_cast_fp16")]; + tensor query_35_cast_fp16 = add(x = var_3241_cast_fp16, y = var_3247_cast_fp16)[name = tensor("query_35_cast_fp16")]; + tensor var_3256_pad_type_0 = const()[name = tensor("op_3256_pad_type_0"), val = tensor("valid")]; + tensor var_3256_strides_0 = const()[name = tensor("op_3256_strides_0"), val = tensor([1, 1])]; + tensor var_3256_pad_0 = const()[name = tensor("op_3256_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3256_dilations_0 = const()[name = tensor("op_3256_dilations_0"), val = tensor([1, 1])]; + tensor var_3256_groups_0 = const()[name = tensor("op_3256_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235006016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235825280))), name = tensor("layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3256_cast_fp16 = conv(dilations = var_3256_dilations_0, groups = var_3256_groups_0, pad = var_3256_pad_0, pad_type = var_3256_pad_type_0, strides = var_3256_strides_0, weight = layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_3256_cast_fp16")]; + tensor var_3262_pad_type_0 = const()[name = tensor("op_3262_pad_type_0"), val = tensor("valid")]; + tensor var_3262_strides_0 = const()[name = tensor("op_3262_strides_0"), val = tensor([1, 1])]; + tensor var_3262_pad_0 = const()[name = tensor("op_3262_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3262_dilations_0 = const()[name = tensor("op_3262_dilations_0"), val = tensor([1, 1])]; + tensor var_3262_groups_0 = const()[name = tensor("op_3262_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235854656))), name = tensor("layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235825408))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3262_cast_fp16 = conv(dilations = var_3262_dilations_0, groups = var_3262_groups_0, pad = var_3262_pad_0, pad_type = var_3262_pad_type_0, strides = var_3262_strides_0, weight = layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_3262_cast_fp16")]; + tensor key_35_cast_fp16 = add(x = var_3256_cast_fp16, y = var_3262_cast_fp16)[name = tensor("key_35_cast_fp16")]; + tensor var_3272_pad_type_0 = const()[name = tensor("op_3272_pad_type_0"), val = tensor("valid")]; + tensor var_3272_strides_0 = const()[name = tensor("op_3272_strides_0"), val = tensor([1, 1])]; + tensor var_3272_pad_0 = const()[name = tensor("op_3272_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3272_dilations_0 = const()[name = tensor("op_3272_dilations_0"), val = tensor([1, 1])]; + tensor var_3272_groups_0 = const()[name = tensor("op_3272_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236059520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236878784))), name = tensor("layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_17_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236878912)))]; + tensor var_3272_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3272_dilations_0, groups = var_3272_groups_0, pad = var_3272_pad_0, pad_type = var_3272_pad_type_0, strides = var_3272_strides_0, weight = layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_3272_cast_fp16")]; + tensor var_3278_pad_type_0 = const()[name = tensor("op_3278_pad_type_0"), val = tensor("valid")]; + tensor var_3278_strides_0 = const()[name = tensor("op_3278_strides_0"), val = tensor([1, 1])]; + tensor var_3278_pad_0 = const()[name = tensor("op_3278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3278_dilations_0 = const()[name = tensor("op_3278_dilations_0"), val = tensor([1, 1])]; + tensor var_3278_groups_0 = const()[name = tensor("op_3278_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236897920))), name = tensor("layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236881536))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3278_cast_fp16 = conv(dilations = var_3278_dilations_0, groups = var_3278_groups_0, pad = var_3278_pad_0, pad_type = var_3278_pad_type_0, strides = var_3278_strides_0, weight = layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_3278_cast_fp16")]; + tensor value_35_cast_fp16 = add(x = var_3272_cast_fp16, y = var_3278_cast_fp16)[name = tensor("value_35_cast_fp16")]; + tensor var_3281 = const()[name = tensor("op_3281"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_35_cast_fp16 = reshape(shape = var_3281, x = query_35_cast_fp16)[name = tensor("mh_q_35_cast_fp16")]; + tensor var_3283_to_fp16 = const()[name = tensor("op_3283_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3284_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_3283_to_fp16)[name = tensor("op_3284_cast_fp16")]; + tensor var_3285 = const()[name = tensor("op_3285"), val = tensor([1, 20, 64, -1])]; + tensor var_3286_cast_fp16 = reshape(shape = var_3285, x = key_35_cast_fp16)[name = tensor("op_3286_cast_fp16")]; + tensor mh_w_35_transpose_x_0 = const()[name = tensor("mh_w_35_transpose_x_0"), val = tensor(true)]; + tensor mh_w_35_transpose_y_0 = const()[name = tensor("mh_w_35_transpose_y_0"), val = tensor(false)]; + tensor mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_3284_cast_fp16, y = var_3286_cast_fp16)[name = tensor("mh_w_35_cast_fp16")]; + tensor var_3289_cast_fp16 = softmax(axis = var_3200, x = mh_w_35_cast_fp16)[name = tensor("op_3289_cast_fp16")]; + tensor var_3290 = const()[name = tensor("op_3290"), val = tensor([1, 20, 64, -1])]; + tensor var_3291_cast_fp16 = reshape(shape = var_3290, x = value_35_cast_fp16)[name = tensor("op_3291_cast_fp16")]; + tensor attn_35_transpose_x_0 = const()[name = tensor("attn_35_transpose_x_0"), val = tensor(false)]; + tensor attn_35_transpose_y_0 = const()[name = tensor("attn_35_transpose_y_0"), val = tensor(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_3291_cast_fp16, y = var_3289_cast_fp16)[name = tensor("attn_35_cast_fp16")]; + tensor var_3294 = const()[name = tensor("op_3294"), val = tensor([1, 1280, 1, -1])]; + tensor input_137_cast_fp16 = reshape(shape = var_3294, x = attn_35_cast_fp16)[name = tensor("input_137_cast_fp16")]; + tensor var_3304_pad_type_0 = const()[name = tensor("op_3304_pad_type_0"), val = tensor("valid")]; + tensor var_3304_strides_0 = const()[name = tensor("op_3304_strides_0"), val = tensor([1, 1])]; + tensor var_3304_pad_0 = const()[name = tensor("op_3304_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3304_dilations_0 = const()[name = tensor("op_3304_dilations_0"), val = tensor([1, 1])]; + tensor var_3304_groups_0 = const()[name = tensor("op_3304_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237102784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237922048))), name = tensor("layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_17_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237922176)))]; + tensor var_3304_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3304_dilations_0, groups = var_3304_groups_0, pad = var_3304_pad_0, pad_type = var_3304_pad_type_0, strides = var_3304_strides_0, weight = layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = tensor("op_3304_cast_fp16")]; + tensor var_3310_pad_type_0 = const()[name = tensor("op_3310_pad_type_0"), val = tensor("valid")]; + tensor var_3310_strides_0 = const()[name = tensor("op_3310_strides_0"), val = tensor([1, 1])]; + tensor var_3310_pad_0 = const()[name = tensor("op_3310_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3310_dilations_0 = const()[name = tensor("op_3310_dilations_0"), val = tensor([1, 1])]; + tensor var_3310_groups_0 = const()[name = tensor("op_3310_groups_0"), val = tensor(1)]; + tensor layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237940736))), name = tensor("layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237924800))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3310_cast_fp16 = conv(dilations = var_3310_dilations_0, groups = var_3310_groups_0, pad = var_3310_pad_0, pad_type = var_3310_pad_type_0, strides = var_3310_strides_0, weight = layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_137_cast_fp16)[name = tensor("op_3310_cast_fp16")]; + tensor obj_71_cast_fp16 = add(x = var_3304_cast_fp16, y = var_3310_cast_fp16)[name = tensor("obj_71_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_71_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; + tensor out_71_axes_0 = const()[name = tensor("out_71_axes_0"), val = tensor([1])]; + tensor var_3321_to_fp16 = const()[name = tensor("op_3321_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3321_to_fp16, x = inputs_71_cast_fp16)[name = tensor("out_71_cast_fp16")]; + tensor input_139_gamma_0_to_fp16 = const()[name = tensor("input_139_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238145600)))]; + tensor input_139_beta_0_to_fp16 = const()[name = tensor("input_139_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238148224)))]; + tensor input_139_epsilon_0_to_fp16 = const()[name = tensor("input_139_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_139_cast_fp16 = batch_norm(beta = input_139_beta_0_to_fp16, epsilon = input_139_epsilon_0_to_fp16, gamma = input_139_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor("input_139_cast_fp16")]; + tensor var_3339_pad_type_0 = const()[name = tensor("op_3339_pad_type_0"), val = tensor("valid")]; + tensor var_3339_strides_0 = const()[name = tensor("op_3339_strides_0"), val = tensor([1, 1])]; + tensor var_3339_pad_0 = const()[name = tensor("op_3339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3339_dilations_0 = const()[name = tensor("op_3339_dilations_0"), val = tensor([1, 1])]; + tensor var_3339_groups_0 = const()[name = tensor("op_3339_groups_0"), val = tensor(1)]; + tensor layers_17_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238150848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241427712))), name = tensor("layers_17_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_17_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241427840)))]; + tensor var_3339_cast_fp16 = conv(bias = layers_17_fc1_inlier_module_bias_to_fp16, dilations = var_3339_dilations_0, groups = var_3339_groups_0, pad = var_3339_pad_0, pad_type = var_3339_pad_type_0, strides = var_3339_strides_0, weight = layers_17_fc1_inlier_module_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = tensor("op_3339_cast_fp16")]; + tensor var_3345_pad_type_0 = const()[name = tensor("op_3345_pad_type_0"), val = tensor("valid")]; + tensor var_3345_strides_0 = const()[name = tensor("op_3345_strides_0"), val = tensor([1, 1])]; + tensor var_3345_pad_0 = const()[name = tensor("op_3345_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3345_dilations_0 = const()[name = tensor("op_3345_dilations_0"), val = tensor([1, 1])]; + tensor var_3345_groups_0 = const()[name = tensor("op_3345_groups_0"), val = tensor(1)]; + tensor layers_17_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241516992))), name = tensor("layers_17_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241438144))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3345_cast_fp16 = conv(dilations = var_3345_dilations_0, groups = var_3345_groups_0, pad = var_3345_pad_0, pad_type = var_3345_pad_type_0, strides = var_3345_strides_0, weight = layers_17_fc1_outlier_module_weight_to_fp16_sparsified, x = input_139_cast_fp16)[name = tensor("op_3345_cast_fp16")]; + tensor input_141_cast_fp16 = add(x = var_3339_cast_fp16, y = var_3345_cast_fp16)[name = tensor("input_141_cast_fp16")]; + tensor input_143_mode_0 = const()[name = tensor("input_143_mode_0"), val = tensor("EXACT")]; + tensor input_143_cast_fp16 = gelu(mode = input_143_mode_0, x = input_141_cast_fp16)[name = tensor("input_143_cast_fp16")]; + tensor var_3356_pad_type_0 = const()[name = tensor("op_3356_pad_type_0"), val = tensor("valid")]; + tensor var_3356_strides_0 = const()[name = tensor("op_3356_strides_0"), val = tensor([1, 1])]; + tensor var_3356_pad_0 = const()[name = tensor("op_3356_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3356_dilations_0 = const()[name = tensor("op_3356_dilations_0"), val = tensor([1, 1])]; + tensor var_3356_groups_0 = const()[name = tensor("op_3356_groups_0"), val = tensor(1)]; + tensor layers_17_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242336256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245613120))), name = tensor("layers_17_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_17_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245613248)))]; + tensor var_3356_cast_fp16 = conv(bias = layers_17_fc2_inlier_module_bias_to_fp16, dilations = var_3356_dilations_0, groups = var_3356_groups_0, pad = var_3356_pad_0, pad_type = var_3356_pad_type_0, strides = var_3356_strides_0, weight = layers_17_fc2_inlier_module_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = tensor("op_3356_cast_fp16")]; + tensor var_3362_pad_type_0 = const()[name = tensor("op_3362_pad_type_0"), val = tensor("valid")]; + tensor var_3362_strides_0 = const()[name = tensor("op_3362_strides_0"), val = tensor([1, 1])]; + tensor var_3362_pad_0 = const()[name = tensor("op_3362_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3362_dilations_0 = const()[name = tensor("op_3362_dilations_0"), val = tensor([1, 1])]; + tensor var_3362_groups_0 = const()[name = tensor("op_3362_groups_0"), val = tensor(1)]; + tensor layers_17_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245717952))), name = tensor("layers_17_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245615872))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3362_cast_fp16 = conv(dilations = var_3362_dilations_0, groups = var_3362_groups_0, pad = var_3362_pad_0, pad_type = var_3362_pad_type_0, strides = var_3362_strides_0, weight = layers_17_fc2_outlier_module_weight_to_fp16_sparsified, x = input_143_cast_fp16)[name = tensor("op_3362_cast_fp16")]; + tensor hidden_states_39_cast_fp16 = add(x = var_3356_cast_fp16, y = var_3362_cast_fp16)[name = tensor("hidden_states_39_cast_fp16")]; + tensor inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor("inputs_73_cast_fp16")]; + tensor var_3372 = const()[name = tensor("op_3372"), val = tensor(3)]; + tensor out_73_axes_0 = const()[name = tensor("out_73_axes_0"), val = tensor([1])]; + tensor var_3391_to_fp16 = const()[name = tensor("op_3391_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_3391_to_fp16, x = inputs_73_cast_fp16)[name = tensor("out_73_cast_fp16")]; + tensor obj_73_gamma_0_to_fp16 = const()[name = tensor("obj_73_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246537216)))]; + tensor obj_73_beta_0_to_fp16 = const()[name = tensor("obj_73_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246539840)))]; + tensor obj_73_epsilon_0_to_fp16 = const()[name = tensor("obj_73_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_73_cast_fp16 = batch_norm(beta = obj_73_beta_0_to_fp16, epsilon = obj_73_epsilon_0_to_fp16, gamma = obj_73_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = tensor("obj_73_cast_fp16")]; + tensor var_3413_pad_type_0 = const()[name = tensor("op_3413_pad_type_0"), val = tensor("valid")]; + tensor var_3413_strides_0 = const()[name = tensor("op_3413_strides_0"), val = tensor([1, 1])]; + tensor var_3413_pad_0 = const()[name = tensor("op_3413_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3413_dilations_0 = const()[name = tensor("op_3413_dilations_0"), val = tensor([1, 1])]; + tensor var_3413_groups_0 = const()[name = tensor("op_3413_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246542464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247361728))), name = tensor("layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_18_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247361856)))]; + tensor var_3413_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3413_dilations_0, groups = var_3413_groups_0, pad = var_3413_pad_0, pad_type = var_3413_pad_type_0, strides = var_3413_strides_0, weight = layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_3413_cast_fp16")]; + tensor var_3419_pad_type_0 = const()[name = tensor("op_3419_pad_type_0"), val = tensor("valid")]; + tensor var_3419_strides_0 = const()[name = tensor("op_3419_strides_0"), val = tensor([1, 1])]; + tensor var_3419_pad_0 = const()[name = tensor("op_3419_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3419_dilations_0 = const()[name = tensor("op_3419_dilations_0"), val = tensor([1, 1])]; + tensor var_3419_groups_0 = const()[name = tensor("op_3419_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247417600))), name = tensor("layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247364480))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3419_cast_fp16 = conv(dilations = var_3419_dilations_0, groups = var_3419_groups_0, pad = var_3419_pad_0, pad_type = var_3419_pad_type_0, strides = var_3419_strides_0, weight = layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_3419_cast_fp16")]; + tensor query_37_cast_fp16 = add(x = var_3413_cast_fp16, y = var_3419_cast_fp16)[name = tensor("query_37_cast_fp16")]; + tensor var_3428_pad_type_0 = const()[name = tensor("op_3428_pad_type_0"), val = tensor("valid")]; + tensor var_3428_strides_0 = const()[name = tensor("op_3428_strides_0"), val = tensor([1, 1])]; + tensor var_3428_pad_0 = const()[name = tensor("op_3428_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3428_dilations_0 = const()[name = tensor("op_3428_dilations_0"), val = tensor([1, 1])]; + tensor var_3428_groups_0 = const()[name = tensor("op_3428_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247622464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248441728))), name = tensor("layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3428_cast_fp16 = conv(dilations = var_3428_dilations_0, groups = var_3428_groups_0, pad = var_3428_pad_0, pad_type = var_3428_pad_type_0, strides = var_3428_strides_0, weight = layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_3428_cast_fp16")]; + tensor var_3434_pad_type_0 = const()[name = tensor("op_3434_pad_type_0"), val = tensor("valid")]; + tensor var_3434_strides_0 = const()[name = tensor("op_3434_strides_0"), val = tensor([1, 1])]; + tensor var_3434_pad_0 = const()[name = tensor("op_3434_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3434_dilations_0 = const()[name = tensor("op_3434_dilations_0"), val = tensor([1, 1])]; + tensor var_3434_groups_0 = const()[name = tensor("op_3434_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248472512))), name = tensor("layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248441856))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3434_cast_fp16 = conv(dilations = var_3434_dilations_0, groups = var_3434_groups_0, pad = var_3434_pad_0, pad_type = var_3434_pad_type_0, strides = var_3434_strides_0, weight = layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_3434_cast_fp16")]; + tensor key_37_cast_fp16 = add(x = var_3428_cast_fp16, y = var_3434_cast_fp16)[name = tensor("key_37_cast_fp16")]; + tensor var_3444_pad_type_0 = const()[name = tensor("op_3444_pad_type_0"), val = tensor("valid")]; + tensor var_3444_strides_0 = const()[name = tensor("op_3444_strides_0"), val = tensor([1, 1])]; + tensor var_3444_pad_0 = const()[name = tensor("op_3444_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3444_dilations_0 = const()[name = tensor("op_3444_dilations_0"), val = tensor([1, 1])]; + tensor var_3444_groups_0 = const()[name = tensor("op_3444_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248677376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249496640))), name = tensor("layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_18_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249496768)))]; + tensor var_3444_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3444_dilations_0, groups = var_3444_groups_0, pad = var_3444_pad_0, pad_type = var_3444_pad_type_0, strides = var_3444_strides_0, weight = layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_3444_cast_fp16")]; + tensor var_3450_pad_type_0 = const()[name = tensor("op_3450_pad_type_0"), val = tensor("valid")]; + tensor var_3450_strides_0 = const()[name = tensor("op_3450_strides_0"), val = tensor([1, 1])]; + tensor var_3450_pad_0 = const()[name = tensor("op_3450_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3450_dilations_0 = const()[name = tensor("op_3450_dilations_0"), val = tensor([1, 1])]; + tensor var_3450_groups_0 = const()[name = tensor("op_3450_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249515520))), name = tensor("layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249499392))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3450_cast_fp16 = conv(dilations = var_3450_dilations_0, groups = var_3450_groups_0, pad = var_3450_pad_0, pad_type = var_3450_pad_type_0, strides = var_3450_strides_0, weight = layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_3450_cast_fp16")]; + tensor value_37_cast_fp16 = add(x = var_3444_cast_fp16, y = var_3450_cast_fp16)[name = tensor("value_37_cast_fp16")]; + tensor var_3453 = const()[name = tensor("op_3453"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_37_cast_fp16 = reshape(shape = var_3453, x = query_37_cast_fp16)[name = tensor("mh_q_37_cast_fp16")]; + tensor var_3455_to_fp16 = const()[name = tensor("op_3455_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3456_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_3455_to_fp16)[name = tensor("op_3456_cast_fp16")]; + tensor var_3457 = const()[name = tensor("op_3457"), val = tensor([1, 20, 64, -1])]; + tensor var_3458_cast_fp16 = reshape(shape = var_3457, x = key_37_cast_fp16)[name = tensor("op_3458_cast_fp16")]; + tensor mh_w_37_transpose_x_0 = const()[name = tensor("mh_w_37_transpose_x_0"), val = tensor(true)]; + tensor mh_w_37_transpose_y_0 = const()[name = tensor("mh_w_37_transpose_y_0"), val = tensor(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_3456_cast_fp16, y = var_3458_cast_fp16)[name = tensor("mh_w_37_cast_fp16")]; + tensor var_3461_cast_fp16 = softmax(axis = var_3372, x = mh_w_37_cast_fp16)[name = tensor("op_3461_cast_fp16")]; + tensor var_3462 = const()[name = tensor("op_3462"), val = tensor([1, 20, 64, -1])]; + tensor var_3463_cast_fp16 = reshape(shape = var_3462, x = value_37_cast_fp16)[name = tensor("op_3463_cast_fp16")]; + tensor attn_37_transpose_x_0 = const()[name = tensor("attn_37_transpose_x_0"), val = tensor(false)]; + tensor attn_37_transpose_y_0 = const()[name = tensor("attn_37_transpose_y_0"), val = tensor(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_3463_cast_fp16, y = var_3461_cast_fp16)[name = tensor("attn_37_cast_fp16")]; + tensor var_3466 = const()[name = tensor("op_3466"), val = tensor([1, 1280, 1, -1])]; + tensor input_145_cast_fp16 = reshape(shape = var_3466, x = attn_37_cast_fp16)[name = tensor("input_145_cast_fp16")]; + tensor var_3476_pad_type_0 = const()[name = tensor("op_3476_pad_type_0"), val = tensor("valid")]; + tensor var_3476_strides_0 = const()[name = tensor("op_3476_strides_0"), val = tensor([1, 1])]; + tensor var_3476_pad_0 = const()[name = tensor("op_3476_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3476_dilations_0 = const()[name = tensor("op_3476_dilations_0"), val = tensor([1, 1])]; + tensor var_3476_groups_0 = const()[name = tensor("op_3476_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249720384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250539648))), name = tensor("layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_18_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250539776)))]; + tensor var_3476_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3476_dilations_0, groups = var_3476_groups_0, pad = var_3476_pad_0, pad_type = var_3476_pad_type_0, strides = var_3476_strides_0, weight = layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = tensor("op_3476_cast_fp16")]; + tensor var_3482_pad_type_0 = const()[name = tensor("op_3482_pad_type_0"), val = tensor("valid")]; + tensor var_3482_strides_0 = const()[name = tensor("op_3482_strides_0"), val = tensor([1, 1])]; + tensor var_3482_pad_0 = const()[name = tensor("op_3482_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3482_dilations_0 = const()[name = tensor("op_3482_dilations_0"), val = tensor([1, 1])]; + tensor var_3482_groups_0 = const()[name = tensor("op_3482_groups_0"), val = tensor(1)]; + tensor layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250558144))), name = tensor("layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250542400))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3482_cast_fp16 = conv(dilations = var_3482_dilations_0, groups = var_3482_groups_0, pad = var_3482_pad_0, pad_type = var_3482_pad_type_0, strides = var_3482_strides_0, weight = layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_145_cast_fp16)[name = tensor("op_3482_cast_fp16")]; + tensor obj_75_cast_fp16 = add(x = var_3476_cast_fp16, y = var_3482_cast_fp16)[name = tensor("obj_75_cast_fp16")]; + tensor inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_75_cast_fp16)[name = tensor("inputs_75_cast_fp16")]; + tensor out_75_axes_0 = const()[name = tensor("out_75_axes_0"), val = tensor([1])]; + tensor var_3493_to_fp16 = const()[name = tensor("op_3493_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_75_cast_fp16 = layer_norm(axes = out_75_axes_0, epsilon = var_3493_to_fp16, x = inputs_75_cast_fp16)[name = tensor("out_75_cast_fp16")]; + tensor input_147_gamma_0_to_fp16 = const()[name = tensor("input_147_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250763008)))]; + tensor input_147_beta_0_to_fp16 = const()[name = tensor("input_147_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250765632)))]; + tensor input_147_epsilon_0_to_fp16 = const()[name = tensor("input_147_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_147_cast_fp16 = batch_norm(beta = input_147_beta_0_to_fp16, epsilon = input_147_epsilon_0_to_fp16, gamma = input_147_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = tensor("input_147_cast_fp16")]; + tensor var_3511_pad_type_0 = const()[name = tensor("op_3511_pad_type_0"), val = tensor("valid")]; + tensor var_3511_strides_0 = const()[name = tensor("op_3511_strides_0"), val = tensor([1, 1])]; + tensor var_3511_pad_0 = const()[name = tensor("op_3511_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3511_dilations_0 = const()[name = tensor("op_3511_dilations_0"), val = tensor([1, 1])]; + tensor var_3511_groups_0 = const()[name = tensor("op_3511_groups_0"), val = tensor(1)]; + tensor layers_18_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250768256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254045120))), name = tensor("layers_18_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_18_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254045248)))]; + tensor var_3511_cast_fp16 = conv(bias = layers_18_fc1_inlier_module_bias_to_fp16, dilations = var_3511_dilations_0, groups = var_3511_groups_0, pad = var_3511_pad_0, pad_type = var_3511_pad_type_0, strides = var_3511_strides_0, weight = layers_18_fc1_inlier_module_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = tensor("op_3511_cast_fp16")]; + tensor var_3517_pad_type_0 = const()[name = tensor("op_3517_pad_type_0"), val = tensor("valid")]; + tensor var_3517_strides_0 = const()[name = tensor("op_3517_strides_0"), val = tensor([1, 1])]; + tensor var_3517_pad_0 = const()[name = tensor("op_3517_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3517_dilations_0 = const()[name = tensor("op_3517_dilations_0"), val = tensor([1, 1])]; + tensor var_3517_groups_0 = const()[name = tensor("op_3517_groups_0"), val = tensor(1)]; + tensor layers_18_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254130816))), name = tensor("layers_18_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254055552))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3517_cast_fp16 = conv(dilations = var_3517_dilations_0, groups = var_3517_groups_0, pad = var_3517_pad_0, pad_type = var_3517_pad_type_0, strides = var_3517_strides_0, weight = layers_18_fc1_outlier_module_weight_to_fp16_sparsified, x = input_147_cast_fp16)[name = tensor("op_3517_cast_fp16")]; + tensor input_149_cast_fp16 = add(x = var_3511_cast_fp16, y = var_3517_cast_fp16)[name = tensor("input_149_cast_fp16")]; + tensor input_151_mode_0 = const()[name = tensor("input_151_mode_0"), val = tensor("EXACT")]; + tensor input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor("input_151_cast_fp16")]; + tensor var_3528_pad_type_0 = const()[name = tensor("op_3528_pad_type_0"), val = tensor("valid")]; + tensor var_3528_strides_0 = const()[name = tensor("op_3528_strides_0"), val = tensor([1, 1])]; + tensor var_3528_pad_0 = const()[name = tensor("op_3528_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3528_dilations_0 = const()[name = tensor("op_3528_dilations_0"), val = tensor([1, 1])]; + tensor var_3528_groups_0 = const()[name = tensor("op_3528_groups_0"), val = tensor(1)]; + tensor layers_18_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254950080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258226944))), name = tensor("layers_18_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_18_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258227072)))]; + tensor var_3528_cast_fp16 = conv(bias = layers_18_fc2_inlier_module_bias_to_fp16, dilations = var_3528_dilations_0, groups = var_3528_groups_0, pad = var_3528_pad_0, pad_type = var_3528_pad_type_0, strides = var_3528_strides_0, weight = layers_18_fc2_inlier_module_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = tensor("op_3528_cast_fp16")]; + tensor var_3534_pad_type_0 = const()[name = tensor("op_3534_pad_type_0"), val = tensor("valid")]; + tensor var_3534_strides_0 = const()[name = tensor("op_3534_strides_0"), val = tensor([1, 1])]; + tensor var_3534_pad_0 = const()[name = tensor("op_3534_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3534_dilations_0 = const()[name = tensor("op_3534_dilations_0"), val = tensor([1, 1])]; + tensor var_3534_groups_0 = const()[name = tensor("op_3534_groups_0"), val = tensor(1)]; + tensor layers_18_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258318976))), name = tensor("layers_18_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258229696))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3534_cast_fp16 = conv(dilations = var_3534_dilations_0, groups = var_3534_groups_0, pad = var_3534_pad_0, pad_type = var_3534_pad_type_0, strides = var_3534_strides_0, weight = layers_18_fc2_outlier_module_weight_to_fp16_sparsified, x = input_151_cast_fp16)[name = tensor("op_3534_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = var_3528_cast_fp16, y = var_3534_cast_fp16)[name = tensor("hidden_states_41_cast_fp16")]; + tensor inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor("inputs_77_cast_fp16")]; + tensor var_3544 = const()[name = tensor("op_3544"), val = tensor(3)]; + tensor out_77_axes_0 = const()[name = tensor("out_77_axes_0"), val = tensor([1])]; + tensor var_3563_to_fp16 = const()[name = tensor("op_3563_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_77_cast_fp16 = layer_norm(axes = out_77_axes_0, epsilon = var_3563_to_fp16, x = inputs_77_cast_fp16)[name = tensor("out_77_cast_fp16")]; + tensor obj_77_gamma_0_to_fp16 = const()[name = tensor("obj_77_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259138240)))]; + tensor obj_77_beta_0_to_fp16 = const()[name = tensor("obj_77_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259140864)))]; + tensor obj_77_epsilon_0_to_fp16 = const()[name = tensor("obj_77_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_77_cast_fp16 = batch_norm(beta = obj_77_beta_0_to_fp16, epsilon = obj_77_epsilon_0_to_fp16, gamma = obj_77_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = tensor("obj_77_cast_fp16")]; + tensor var_3585_pad_type_0 = const()[name = tensor("op_3585_pad_type_0"), val = tensor("valid")]; + tensor var_3585_strides_0 = const()[name = tensor("op_3585_strides_0"), val = tensor([1, 1])]; + tensor var_3585_pad_0 = const()[name = tensor("op_3585_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3585_dilations_0 = const()[name = tensor("op_3585_dilations_0"), val = tensor([1, 1])]; + tensor var_3585_groups_0 = const()[name = tensor("op_3585_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259143488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259962752))), name = tensor("layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_19_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259962880)))]; + tensor var_3585_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3585_dilations_0, groups = var_3585_groups_0, pad = var_3585_pad_0, pad_type = var_3585_pad_type_0, strides = var_3585_strides_0, weight = layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_3585_cast_fp16")]; + tensor var_3591_pad_type_0 = const()[name = tensor("op_3591_pad_type_0"), val = tensor("valid")]; + tensor var_3591_strides_0 = const()[name = tensor("op_3591_strides_0"), val = tensor([1, 1])]; + tensor var_3591_pad_0 = const()[name = tensor("op_3591_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3591_dilations_0 = const()[name = tensor("op_3591_dilations_0"), val = tensor([1, 1])]; + tensor var_3591_groups_0 = const()[name = tensor("op_3591_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260020736))), name = tensor("layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259965504))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3591_cast_fp16 = conv(dilations = var_3591_dilations_0, groups = var_3591_groups_0, pad = var_3591_pad_0, pad_type = var_3591_pad_type_0, strides = var_3591_strides_0, weight = layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_3591_cast_fp16")]; + tensor query_39_cast_fp16 = add(x = var_3585_cast_fp16, y = var_3591_cast_fp16)[name = tensor("query_39_cast_fp16")]; + tensor var_3600_pad_type_0 = const()[name = tensor("op_3600_pad_type_0"), val = tensor("valid")]; + tensor var_3600_strides_0 = const()[name = tensor("op_3600_strides_0"), val = tensor([1, 1])]; + tensor var_3600_pad_0 = const()[name = tensor("op_3600_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3600_dilations_0 = const()[name = tensor("op_3600_dilations_0"), val = tensor([1, 1])]; + tensor var_3600_groups_0 = const()[name = tensor("op_3600_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260225600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261044864))), name = tensor("layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3600_cast_fp16 = conv(dilations = var_3600_dilations_0, groups = var_3600_groups_0, pad = var_3600_pad_0, pad_type = var_3600_pad_type_0, strides = var_3600_strides_0, weight = layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_3600_cast_fp16")]; + tensor var_3606_pad_type_0 = const()[name = tensor("op_3606_pad_type_0"), val = tensor("valid")]; + tensor var_3606_strides_0 = const()[name = tensor("op_3606_strides_0"), val = tensor([1, 1])]; + tensor var_3606_pad_0 = const()[name = tensor("op_3606_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3606_dilations_0 = const()[name = tensor("op_3606_dilations_0"), val = tensor([1, 1])]; + tensor var_3606_groups_0 = const()[name = tensor("op_3606_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261079936))), name = tensor("layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261044992))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3606_cast_fp16 = conv(dilations = var_3606_dilations_0, groups = var_3606_groups_0, pad = var_3606_pad_0, pad_type = var_3606_pad_type_0, strides = var_3606_strides_0, weight = layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_3606_cast_fp16")]; + tensor key_39_cast_fp16 = add(x = var_3600_cast_fp16, y = var_3606_cast_fp16)[name = tensor("key_39_cast_fp16")]; + tensor var_3616_pad_type_0 = const()[name = tensor("op_3616_pad_type_0"), val = tensor("valid")]; + tensor var_3616_strides_0 = const()[name = tensor("op_3616_strides_0"), val = tensor([1, 1])]; + tensor var_3616_pad_0 = const()[name = tensor("op_3616_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3616_dilations_0 = const()[name = tensor("op_3616_dilations_0"), val = tensor([1, 1])]; + tensor var_3616_groups_0 = const()[name = tensor("op_3616_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261284800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262104064))), name = tensor("layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_19_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262104192)))]; + tensor var_3616_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3616_dilations_0, groups = var_3616_groups_0, pad = var_3616_pad_0, pad_type = var_3616_pad_type_0, strides = var_3616_strides_0, weight = layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_3616_cast_fp16")]; + tensor var_3622_pad_type_0 = const()[name = tensor("op_3622_pad_type_0"), val = tensor("valid")]; + tensor var_3622_strides_0 = const()[name = tensor("op_3622_strides_0"), val = tensor([1, 1])]; + tensor var_3622_pad_0 = const()[name = tensor("op_3622_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3622_dilations_0 = const()[name = tensor("op_3622_dilations_0"), val = tensor([1, 1])]; + tensor var_3622_groups_0 = const()[name = tensor("op_3622_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262121984))), name = tensor("layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262106816))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3622_cast_fp16 = conv(dilations = var_3622_dilations_0, groups = var_3622_groups_0, pad = var_3622_pad_0, pad_type = var_3622_pad_type_0, strides = var_3622_strides_0, weight = layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_3622_cast_fp16")]; + tensor value_39_cast_fp16 = add(x = var_3616_cast_fp16, y = var_3622_cast_fp16)[name = tensor("value_39_cast_fp16")]; + tensor var_3625 = const()[name = tensor("op_3625"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_39_cast_fp16 = reshape(shape = var_3625, x = query_39_cast_fp16)[name = tensor("mh_q_39_cast_fp16")]; + tensor var_3627_to_fp16 = const()[name = tensor("op_3627_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3628_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_3627_to_fp16)[name = tensor("op_3628_cast_fp16")]; + tensor var_3629 = const()[name = tensor("op_3629"), val = tensor([1, 20, 64, -1])]; + tensor var_3630_cast_fp16 = reshape(shape = var_3629, x = key_39_cast_fp16)[name = tensor("op_3630_cast_fp16")]; + tensor mh_w_39_transpose_x_0 = const()[name = tensor("mh_w_39_transpose_x_0"), val = tensor(true)]; + tensor mh_w_39_transpose_y_0 = const()[name = tensor("mh_w_39_transpose_y_0"), val = tensor(false)]; + tensor mh_w_39_cast_fp16 = matmul(transpose_x = mh_w_39_transpose_x_0, transpose_y = mh_w_39_transpose_y_0, x = var_3628_cast_fp16, y = var_3630_cast_fp16)[name = tensor("mh_w_39_cast_fp16")]; + tensor var_3633_cast_fp16 = softmax(axis = var_3544, x = mh_w_39_cast_fp16)[name = tensor("op_3633_cast_fp16")]; + tensor var_3634 = const()[name = tensor("op_3634"), val = tensor([1, 20, 64, -1])]; + tensor var_3635_cast_fp16 = reshape(shape = var_3634, x = value_39_cast_fp16)[name = tensor("op_3635_cast_fp16")]; + tensor attn_39_transpose_x_0 = const()[name = tensor("attn_39_transpose_x_0"), val = tensor(false)]; + tensor attn_39_transpose_y_0 = const()[name = tensor("attn_39_transpose_y_0"), val = tensor(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_3635_cast_fp16, y = var_3633_cast_fp16)[name = tensor("attn_39_cast_fp16")]; + tensor var_3638 = const()[name = tensor("op_3638"), val = tensor([1, 1280, 1, -1])]; + tensor input_153_cast_fp16 = reshape(shape = var_3638, x = attn_39_cast_fp16)[name = tensor("input_153_cast_fp16")]; + tensor var_3648_pad_type_0 = const()[name = tensor("op_3648_pad_type_0"), val = tensor("valid")]; + tensor var_3648_strides_0 = const()[name = tensor("op_3648_strides_0"), val = tensor([1, 1])]; + tensor var_3648_pad_0 = const()[name = tensor("op_3648_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3648_dilations_0 = const()[name = tensor("op_3648_dilations_0"), val = tensor([1, 1])]; + tensor var_3648_groups_0 = const()[name = tensor("op_3648_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262326848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263146112))), name = tensor("layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_19_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263146240)))]; + tensor var_3648_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3648_dilations_0, groups = var_3648_groups_0, pad = var_3648_pad_0, pad_type = var_3648_pad_type_0, strides = var_3648_strides_0, weight = layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = tensor("op_3648_cast_fp16")]; + tensor var_3654_pad_type_0 = const()[name = tensor("op_3654_pad_type_0"), val = tensor("valid")]; + tensor var_3654_strides_0 = const()[name = tensor("op_3654_strides_0"), val = tensor([1, 1])]; + tensor var_3654_pad_0 = const()[name = tensor("op_3654_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3654_dilations_0 = const()[name = tensor("op_3654_dilations_0"), val = tensor([1, 1])]; + tensor var_3654_groups_0 = const()[name = tensor("op_3654_groups_0"), val = tensor(1)]; + tensor layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263162496))), name = tensor("layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263148864))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3654_cast_fp16 = conv(dilations = var_3654_dilations_0, groups = var_3654_groups_0, pad = var_3654_pad_0, pad_type = var_3654_pad_type_0, strides = var_3654_strides_0, weight = layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_153_cast_fp16)[name = tensor("op_3654_cast_fp16")]; + tensor obj_79_cast_fp16 = add(x = var_3648_cast_fp16, y = var_3654_cast_fp16)[name = tensor("obj_79_cast_fp16")]; + tensor inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = obj_79_cast_fp16)[name = tensor("inputs_79_cast_fp16")]; + tensor out_79_axes_0 = const()[name = tensor("out_79_axes_0"), val = tensor([1])]; + tensor var_3665_to_fp16 = const()[name = tensor("op_3665_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_3665_to_fp16, x = inputs_79_cast_fp16)[name = tensor("out_79_cast_fp16")]; + tensor input_155_gamma_0_to_fp16 = const()[name = tensor("input_155_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263367360)))]; + tensor input_155_beta_0_to_fp16 = const()[name = tensor("input_155_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263369984)))]; + tensor input_155_epsilon_0_to_fp16 = const()[name = tensor("input_155_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = tensor("input_155_cast_fp16")]; + tensor var_3683_pad_type_0 = const()[name = tensor("op_3683_pad_type_0"), val = tensor("valid")]; + tensor var_3683_strides_0 = const()[name = tensor("op_3683_strides_0"), val = tensor([1, 1])]; + tensor var_3683_pad_0 = const()[name = tensor("op_3683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3683_dilations_0 = const()[name = tensor("op_3683_dilations_0"), val = tensor([1, 1])]; + tensor var_3683_groups_0 = const()[name = tensor("op_3683_groups_0"), val = tensor(1)]; + tensor layers_19_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263372608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266649472))), name = tensor("layers_19_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_19_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266649600)))]; + tensor var_3683_cast_fp16 = conv(bias = layers_19_fc1_inlier_module_bias_to_fp16, dilations = var_3683_dilations_0, groups = var_3683_groups_0, pad = var_3683_pad_0, pad_type = var_3683_pad_type_0, strides = var_3683_strides_0, weight = layers_19_fc1_inlier_module_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = tensor("op_3683_cast_fp16")]; + tensor var_3689_pad_type_0 = const()[name = tensor("op_3689_pad_type_0"), val = tensor("valid")]; + tensor var_3689_strides_0 = const()[name = tensor("op_3689_strides_0"), val = tensor([1, 1])]; + tensor var_3689_pad_0 = const()[name = tensor("op_3689_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3689_dilations_0 = const()[name = tensor("op_3689_dilations_0"), val = tensor([1, 1])]; + tensor var_3689_groups_0 = const()[name = tensor("op_3689_groups_0"), val = tensor(1)]; + tensor layers_19_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266736192))), name = tensor("layers_19_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266659904))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3689_cast_fp16 = conv(dilations = var_3689_dilations_0, groups = var_3689_groups_0, pad = var_3689_pad_0, pad_type = var_3689_pad_type_0, strides = var_3689_strides_0, weight = layers_19_fc1_outlier_module_weight_to_fp16_sparsified, x = input_155_cast_fp16)[name = tensor("op_3689_cast_fp16")]; + tensor input_157_cast_fp16 = add(x = var_3683_cast_fp16, y = var_3689_cast_fp16)[name = tensor("input_157_cast_fp16")]; + tensor input_159_mode_0 = const()[name = tensor("input_159_mode_0"), val = tensor("EXACT")]; + tensor input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; + tensor var_3700_pad_type_0 = const()[name = tensor("op_3700_pad_type_0"), val = tensor("valid")]; + tensor var_3700_strides_0 = const()[name = tensor("op_3700_strides_0"), val = tensor([1, 1])]; + tensor var_3700_pad_0 = const()[name = tensor("op_3700_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3700_dilations_0 = const()[name = tensor("op_3700_dilations_0"), val = tensor([1, 1])]; + tensor var_3700_groups_0 = const()[name = tensor("op_3700_groups_0"), val = tensor(1)]; + tensor layers_19_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(267555456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270832320))), name = tensor("layers_19_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_19_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270832448)))]; + tensor var_3700_cast_fp16 = conv(bias = layers_19_fc2_inlier_module_bias_to_fp16, dilations = var_3700_dilations_0, groups = var_3700_groups_0, pad = var_3700_pad_0, pad_type = var_3700_pad_type_0, strides = var_3700_strides_0, weight = layers_19_fc2_inlier_module_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = tensor("op_3700_cast_fp16")]; + tensor var_3706_pad_type_0 = const()[name = tensor("op_3706_pad_type_0"), val = tensor("valid")]; + tensor var_3706_strides_0 = const()[name = tensor("op_3706_strides_0"), val = tensor([1, 1])]; + tensor var_3706_pad_0 = const()[name = tensor("op_3706_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3706_dilations_0 = const()[name = tensor("op_3706_dilations_0"), val = tensor([1, 1])]; + tensor var_3706_groups_0 = const()[name = tensor("op_3706_groups_0"), val = tensor(1)]; + tensor layers_19_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270911040))), name = tensor("layers_19_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270835072))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3706_cast_fp16 = conv(dilations = var_3706_dilations_0, groups = var_3706_groups_0, pad = var_3706_pad_0, pad_type = var_3706_pad_type_0, strides = var_3706_strides_0, weight = layers_19_fc2_outlier_module_weight_to_fp16_sparsified, x = input_159_cast_fp16)[name = tensor("op_3706_cast_fp16")]; + tensor hidden_states_43_cast_fp16 = add(x = var_3700_cast_fp16, y = var_3706_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; + tensor inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor("inputs_81_cast_fp16")]; + tensor var_3716 = const()[name = tensor("op_3716"), val = tensor(3)]; + tensor out_81_axes_0 = const()[name = tensor("out_81_axes_0"), val = tensor([1])]; + tensor var_3735_to_fp16 = const()[name = tensor("op_3735_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_81_cast_fp16 = layer_norm(axes = out_81_axes_0, epsilon = var_3735_to_fp16, x = inputs_81_cast_fp16)[name = tensor("out_81_cast_fp16")]; + tensor obj_81_gamma_0_to_fp16 = const()[name = tensor("obj_81_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271730304)))]; + tensor obj_81_beta_0_to_fp16 = const()[name = tensor("obj_81_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271732928)))]; + tensor obj_81_epsilon_0_to_fp16 = const()[name = tensor("obj_81_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_81_cast_fp16 = batch_norm(beta = obj_81_beta_0_to_fp16, epsilon = obj_81_epsilon_0_to_fp16, gamma = obj_81_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = tensor("obj_81_cast_fp16")]; + tensor var_3757_pad_type_0 = const()[name = tensor("op_3757_pad_type_0"), val = tensor("valid")]; + tensor var_3757_strides_0 = const()[name = tensor("op_3757_strides_0"), val = tensor([1, 1])]; + tensor var_3757_pad_0 = const()[name = tensor("op_3757_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3757_dilations_0 = const()[name = tensor("op_3757_dilations_0"), val = tensor([1, 1])]; + tensor var_3757_groups_0 = const()[name = tensor("op_3757_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271735552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272554816))), name = tensor("layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_20_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272554944)))]; + tensor var_3757_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3757_dilations_0, groups = var_3757_groups_0, pad = var_3757_pad_0, pad_type = var_3757_pad_type_0, strides = var_3757_strides_0, weight = layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_3757_cast_fp16")]; + tensor var_3763_pad_type_0 = const()[name = tensor("op_3763_pad_type_0"), val = tensor("valid")]; + tensor var_3763_strides_0 = const()[name = tensor("op_3763_strides_0"), val = tensor([1, 1])]; + tensor var_3763_pad_0 = const()[name = tensor("op_3763_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3763_dilations_0 = const()[name = tensor("op_3763_dilations_0"), val = tensor([1, 1])]; + tensor var_3763_groups_0 = const()[name = tensor("op_3763_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272594048))), name = tensor("layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272557568))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3763_cast_fp16 = conv(dilations = var_3763_dilations_0, groups = var_3763_groups_0, pad = var_3763_pad_0, pad_type = var_3763_pad_type_0, strides = var_3763_strides_0, weight = layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_3763_cast_fp16")]; + tensor query_41_cast_fp16 = add(x = var_3757_cast_fp16, y = var_3763_cast_fp16)[name = tensor("query_41_cast_fp16")]; + tensor var_3772_pad_type_0 = const()[name = tensor("op_3772_pad_type_0"), val = tensor("valid")]; + tensor var_3772_strides_0 = const()[name = tensor("op_3772_strides_0"), val = tensor([1, 1])]; + tensor var_3772_pad_0 = const()[name = tensor("op_3772_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3772_dilations_0 = const()[name = tensor("op_3772_dilations_0"), val = tensor([1, 1])]; + tensor var_3772_groups_0 = const()[name = tensor("op_3772_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272798912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273618176))), name = tensor("layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3772_cast_fp16 = conv(dilations = var_3772_dilations_0, groups = var_3772_groups_0, pad = var_3772_pad_0, pad_type = var_3772_pad_type_0, strides = var_3772_strides_0, weight = layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_3772_cast_fp16")]; + tensor var_3778_pad_type_0 = const()[name = tensor("op_3778_pad_type_0"), val = tensor("valid")]; + tensor var_3778_strides_0 = const()[name = tensor("op_3778_strides_0"), val = tensor([1, 1])]; + tensor var_3778_pad_0 = const()[name = tensor("op_3778_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3778_dilations_0 = const()[name = tensor("op_3778_dilations_0"), val = tensor([1, 1])]; + tensor var_3778_groups_0 = const()[name = tensor("op_3778_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273644416))), name = tensor("layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273618304))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3778_cast_fp16 = conv(dilations = var_3778_dilations_0, groups = var_3778_groups_0, pad = var_3778_pad_0, pad_type = var_3778_pad_type_0, strides = var_3778_strides_0, weight = layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_3778_cast_fp16")]; + tensor key_41_cast_fp16 = add(x = var_3772_cast_fp16, y = var_3778_cast_fp16)[name = tensor("key_41_cast_fp16")]; + tensor var_3788_pad_type_0 = const()[name = tensor("op_3788_pad_type_0"), val = tensor("valid")]; + tensor var_3788_strides_0 = const()[name = tensor("op_3788_strides_0"), val = tensor([1, 1])]; + tensor var_3788_pad_0 = const()[name = tensor("op_3788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3788_dilations_0 = const()[name = tensor("op_3788_dilations_0"), val = tensor([1, 1])]; + tensor var_3788_groups_0 = const()[name = tensor("op_3788_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273849280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274668544))), name = tensor("layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_20_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274668672)))]; + tensor var_3788_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3788_dilations_0, groups = var_3788_groups_0, pad = var_3788_pad_0, pad_type = var_3788_pad_type_0, strides = var_3788_strides_0, weight = layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_3788_cast_fp16")]; + tensor var_3794_pad_type_0 = const()[name = tensor("op_3794_pad_type_0"), val = tensor("valid")]; + tensor var_3794_strides_0 = const()[name = tensor("op_3794_strides_0"), val = tensor([1, 1])]; + tensor var_3794_pad_0 = const()[name = tensor("op_3794_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3794_dilations_0 = const()[name = tensor("op_3794_dilations_0"), val = tensor([1, 1])]; + tensor var_3794_groups_0 = const()[name = tensor("op_3794_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274685312))), name = tensor("layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274671296))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3794_cast_fp16 = conv(dilations = var_3794_dilations_0, groups = var_3794_groups_0, pad = var_3794_pad_0, pad_type = var_3794_pad_type_0, strides = var_3794_strides_0, weight = layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_3794_cast_fp16")]; + tensor value_41_cast_fp16 = add(x = var_3788_cast_fp16, y = var_3794_cast_fp16)[name = tensor("value_41_cast_fp16")]; + tensor var_3797 = const()[name = tensor("op_3797"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_41_cast_fp16 = reshape(shape = var_3797, x = query_41_cast_fp16)[name = tensor("mh_q_41_cast_fp16")]; + tensor var_3799_to_fp16 = const()[name = tensor("op_3799_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3800_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3799_to_fp16)[name = tensor("op_3800_cast_fp16")]; + tensor var_3801 = const()[name = tensor("op_3801"), val = tensor([1, 20, 64, -1])]; + tensor var_3802_cast_fp16 = reshape(shape = var_3801, x = key_41_cast_fp16)[name = tensor("op_3802_cast_fp16")]; + tensor mh_w_41_transpose_x_0 = const()[name = tensor("mh_w_41_transpose_x_0"), val = tensor(true)]; + tensor mh_w_41_transpose_y_0 = const()[name = tensor("mh_w_41_transpose_y_0"), val = tensor(false)]; + tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_3800_cast_fp16, y = var_3802_cast_fp16)[name = tensor("mh_w_41_cast_fp16")]; + tensor var_3805_cast_fp16 = softmax(axis = var_3716, x = mh_w_41_cast_fp16)[name = tensor("op_3805_cast_fp16")]; + tensor var_3806 = const()[name = tensor("op_3806"), val = tensor([1, 20, 64, -1])]; + tensor var_3807_cast_fp16 = reshape(shape = var_3806, x = value_41_cast_fp16)[name = tensor("op_3807_cast_fp16")]; + tensor attn_41_transpose_x_0 = const()[name = tensor("attn_41_transpose_x_0"), val = tensor(false)]; + tensor attn_41_transpose_y_0 = const()[name = tensor("attn_41_transpose_y_0"), val = tensor(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3807_cast_fp16, y = var_3805_cast_fp16)[name = tensor("attn_41_cast_fp16")]; + tensor var_3810 = const()[name = tensor("op_3810"), val = tensor([1, 1280, 1, -1])]; + tensor input_161_cast_fp16 = reshape(shape = var_3810, x = attn_41_cast_fp16)[name = tensor("input_161_cast_fp16")]; + tensor var_3820_pad_type_0 = const()[name = tensor("op_3820_pad_type_0"), val = tensor("valid")]; + tensor var_3820_strides_0 = const()[name = tensor("op_3820_strides_0"), val = tensor([1, 1])]; + tensor var_3820_pad_0 = const()[name = tensor("op_3820_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3820_dilations_0 = const()[name = tensor("op_3820_dilations_0"), val = tensor([1, 1])]; + tensor var_3820_groups_0 = const()[name = tensor("op_3820_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274890176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275709440))), name = tensor("layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_20_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275709568)))]; + tensor var_3820_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3820_dilations_0, groups = var_3820_groups_0, pad = var_3820_pad_0, pad_type = var_3820_pad_type_0, strides = var_3820_strides_0, weight = layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = tensor("op_3820_cast_fp16")]; + tensor var_3826_pad_type_0 = const()[name = tensor("op_3826_pad_type_0"), val = tensor("valid")]; + tensor var_3826_strides_0 = const()[name = tensor("op_3826_strides_0"), val = tensor([1, 1])]; + tensor var_3826_pad_0 = const()[name = tensor("op_3826_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3826_dilations_0 = const()[name = tensor("op_3826_dilations_0"), val = tensor([1, 1])]; + tensor var_3826_groups_0 = const()[name = tensor("op_3826_groups_0"), val = tensor(1)]; + tensor layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275726912))), name = tensor("layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275712192))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3826_cast_fp16 = conv(dilations = var_3826_dilations_0, groups = var_3826_groups_0, pad = var_3826_pad_0, pad_type = var_3826_pad_type_0, strides = var_3826_strides_0, weight = layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_161_cast_fp16)[name = tensor("op_3826_cast_fp16")]; + tensor obj_83_cast_fp16 = add(x = var_3820_cast_fp16, y = var_3826_cast_fp16)[name = tensor("obj_83_cast_fp16")]; + tensor inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_83_cast_fp16)[name = tensor("inputs_83_cast_fp16")]; + tensor out_83_axes_0 = const()[name = tensor("out_83_axes_0"), val = tensor([1])]; + tensor var_3837_to_fp16 = const()[name = tensor("op_3837_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_83_cast_fp16 = layer_norm(axes = out_83_axes_0, epsilon = var_3837_to_fp16, x = inputs_83_cast_fp16)[name = tensor("out_83_cast_fp16")]; + tensor input_163_gamma_0_to_fp16 = const()[name = tensor("input_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275931776)))]; + tensor input_163_beta_0_to_fp16 = const()[name = tensor("input_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275934400)))]; + tensor input_163_epsilon_0_to_fp16 = const()[name = tensor("input_163_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_163_cast_fp16 = batch_norm(beta = input_163_beta_0_to_fp16, epsilon = input_163_epsilon_0_to_fp16, gamma = input_163_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = tensor("input_163_cast_fp16")]; + tensor var_3855_pad_type_0 = const()[name = tensor("op_3855_pad_type_0"), val = tensor("valid")]; + tensor var_3855_strides_0 = const()[name = tensor("op_3855_strides_0"), val = tensor([1, 1])]; + tensor var_3855_pad_0 = const()[name = tensor("op_3855_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3855_dilations_0 = const()[name = tensor("op_3855_dilations_0"), val = tensor([1, 1])]; + tensor var_3855_groups_0 = const()[name = tensor("op_3855_groups_0"), val = tensor(1)]; + tensor layers_20_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275937024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279213888))), name = tensor("layers_20_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_20_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279214016)))]; + tensor var_3855_cast_fp16 = conv(bias = layers_20_fc1_inlier_module_bias_to_fp16, dilations = var_3855_dilations_0, groups = var_3855_groups_0, pad = var_3855_pad_0, pad_type = var_3855_pad_type_0, strides = var_3855_strides_0, weight = layers_20_fc1_inlier_module_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = tensor("op_3855_cast_fp16")]; + tensor var_3861_pad_type_0 = const()[name = tensor("op_3861_pad_type_0"), val = tensor("valid")]; + tensor var_3861_strides_0 = const()[name = tensor("op_3861_strides_0"), val = tensor([1, 1])]; + tensor var_3861_pad_0 = const()[name = tensor("op_3861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3861_dilations_0 = const()[name = tensor("op_3861_dilations_0"), val = tensor([1, 1])]; + tensor var_3861_groups_0 = const()[name = tensor("op_3861_groups_0"), val = tensor(1)]; + tensor layers_20_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279311872))), name = tensor("layers_20_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279224320))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_3861_cast_fp16 = conv(dilations = var_3861_dilations_0, groups = var_3861_groups_0, pad = var_3861_pad_0, pad_type = var_3861_pad_type_0, strides = var_3861_strides_0, weight = layers_20_fc1_outlier_module_weight_to_fp16_sparsified, x = input_163_cast_fp16)[name = tensor("op_3861_cast_fp16")]; + tensor input_165_cast_fp16 = add(x = var_3855_cast_fp16, y = var_3861_cast_fp16)[name = tensor("input_165_cast_fp16")]; + tensor input_167_mode_0 = const()[name = tensor("input_167_mode_0"), val = tensor("EXACT")]; + tensor input_167_cast_fp16 = gelu(mode = input_167_mode_0, x = input_165_cast_fp16)[name = tensor("input_167_cast_fp16")]; + tensor var_3872_pad_type_0 = const()[name = tensor("op_3872_pad_type_0"), val = tensor("valid")]; + tensor var_3872_strides_0 = const()[name = tensor("op_3872_strides_0"), val = tensor([1, 1])]; + tensor var_3872_pad_0 = const()[name = tensor("op_3872_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3872_dilations_0 = const()[name = tensor("op_3872_dilations_0"), val = tensor([1, 1])]; + tensor var_3872_groups_0 = const()[name = tensor("op_3872_groups_0"), val = tensor(1)]; + tensor layers_20_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(280131136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283408000))), name = tensor("layers_20_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_20_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283408128)))]; + tensor var_3872_cast_fp16 = conv(bias = layers_20_fc2_inlier_module_bias_to_fp16, dilations = var_3872_dilations_0, groups = var_3872_groups_0, pad = var_3872_pad_0, pad_type = var_3872_pad_type_0, strides = var_3872_strides_0, weight = layers_20_fc2_inlier_module_weight_to_fp16_palettized, x = input_167_cast_fp16)[name = tensor("op_3872_cast_fp16")]; + tensor var_3878_pad_type_0 = const()[name = tensor("op_3878_pad_type_0"), val = tensor("valid")]; + tensor var_3878_strides_0 = const()[name = tensor("op_3878_strides_0"), val = tensor([1, 1])]; + tensor var_3878_pad_0 = const()[name = tensor("op_3878_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3878_dilations_0 = const()[name = tensor("op_3878_dilations_0"), val = tensor([1, 1])]; + tensor var_3878_groups_0 = const()[name = tensor("op_3878_groups_0"), val = tensor(1)]; + tensor layers_20_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283466304))), name = tensor("layers_20_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283410752))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_3878_cast_fp16 = conv(dilations = var_3878_dilations_0, groups = var_3878_groups_0, pad = var_3878_pad_0, pad_type = var_3878_pad_type_0, strides = var_3878_strides_0, weight = layers_20_fc2_outlier_module_weight_to_fp16_sparsified, x = input_167_cast_fp16)[name = tensor("op_3878_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = var_3872_cast_fp16, y = var_3878_cast_fp16)[name = tensor("hidden_states_45_cast_fp16")]; + tensor inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_45_cast_fp16)[name = tensor("inputs_85_cast_fp16")]; + tensor var_3888 = const()[name = tensor("op_3888"), val = tensor(3)]; + tensor out_85_axes_0 = const()[name = tensor("out_85_axes_0"), val = tensor([1])]; + tensor var_3907_to_fp16 = const()[name = tensor("op_3907_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_3907_to_fp16, x = inputs_85_cast_fp16)[name = tensor("out_85_cast_fp16")]; + tensor obj_85_gamma_0_to_fp16 = const()[name = tensor("obj_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284285568)))]; + tensor obj_85_beta_0_to_fp16 = const()[name = tensor("obj_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284288192)))]; + tensor obj_85_epsilon_0_to_fp16 = const()[name = tensor("obj_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = tensor("obj_85_cast_fp16")]; + tensor var_3929_pad_type_0 = const()[name = tensor("op_3929_pad_type_0"), val = tensor("valid")]; + tensor var_3929_strides_0 = const()[name = tensor("op_3929_strides_0"), val = tensor([1, 1])]; + tensor var_3929_pad_0 = const()[name = tensor("op_3929_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3929_dilations_0 = const()[name = tensor("op_3929_dilations_0"), val = tensor([1, 1])]; + tensor var_3929_groups_0 = const()[name = tensor("op_3929_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284290816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285110080))), name = tensor("layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_21_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285110208)))]; + tensor var_3929_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3929_dilations_0, groups = var_3929_groups_0, pad = var_3929_pad_0, pad_type = var_3929_pad_type_0, strides = var_3929_strides_0, weight = layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_3929_cast_fp16")]; + tensor var_3935_pad_type_0 = const()[name = tensor("op_3935_pad_type_0"), val = tensor("valid")]; + tensor var_3935_strides_0 = const()[name = tensor("op_3935_strides_0"), val = tensor([1, 1])]; + tensor var_3935_pad_0 = const()[name = tensor("op_3935_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3935_dilations_0 = const()[name = tensor("op_3935_dilations_0"), val = tensor([1, 1])]; + tensor var_3935_groups_0 = const()[name = tensor("op_3935_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285140544))), name = tensor("layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285112832))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3935_cast_fp16 = conv(dilations = var_3935_dilations_0, groups = var_3935_groups_0, pad = var_3935_pad_0, pad_type = var_3935_pad_type_0, strides = var_3935_strides_0, weight = layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_3935_cast_fp16")]; + tensor query_43_cast_fp16 = add(x = var_3929_cast_fp16, y = var_3935_cast_fp16)[name = tensor("query_43_cast_fp16")]; + tensor var_3944_pad_type_0 = const()[name = tensor("op_3944_pad_type_0"), val = tensor("valid")]; + tensor var_3944_strides_0 = const()[name = tensor("op_3944_strides_0"), val = tensor([1, 1])]; + tensor var_3944_pad_0 = const()[name = tensor("op_3944_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3944_dilations_0 = const()[name = tensor("op_3944_dilations_0"), val = tensor([1, 1])]; + tensor var_3944_groups_0 = const()[name = tensor("op_3944_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285345408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286164672))), name = tensor("layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3944_cast_fp16 = conv(dilations = var_3944_dilations_0, groups = var_3944_groups_0, pad = var_3944_pad_0, pad_type = var_3944_pad_type_0, strides = var_3944_strides_0, weight = layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_3944_cast_fp16")]; + tensor var_3950_pad_type_0 = const()[name = tensor("op_3950_pad_type_0"), val = tensor("valid")]; + tensor var_3950_strides_0 = const()[name = tensor("op_3950_strides_0"), val = tensor([1, 1])]; + tensor var_3950_pad_0 = const()[name = tensor("op_3950_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3950_dilations_0 = const()[name = tensor("op_3950_dilations_0"), val = tensor([1, 1])]; + tensor var_3950_groups_0 = const()[name = tensor("op_3950_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286193792))), name = tensor("layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286164800))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3950_cast_fp16 = conv(dilations = var_3950_dilations_0, groups = var_3950_groups_0, pad = var_3950_pad_0, pad_type = var_3950_pad_type_0, strides = var_3950_strides_0, weight = layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_3950_cast_fp16")]; + tensor key_43_cast_fp16 = add(x = var_3944_cast_fp16, y = var_3950_cast_fp16)[name = tensor("key_43_cast_fp16")]; + tensor var_3960_pad_type_0 = const()[name = tensor("op_3960_pad_type_0"), val = tensor("valid")]; + tensor var_3960_strides_0 = const()[name = tensor("op_3960_strides_0"), val = tensor([1, 1])]; + tensor var_3960_pad_0 = const()[name = tensor("op_3960_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3960_dilations_0 = const()[name = tensor("op_3960_dilations_0"), val = tensor([1, 1])]; + tensor var_3960_groups_0 = const()[name = tensor("op_3960_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286398656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287217920))), name = tensor("layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_21_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287218048)))]; + tensor var_3960_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3960_dilations_0, groups = var_3960_groups_0, pad = var_3960_pad_0, pad_type = var_3960_pad_type_0, strides = var_3960_strides_0, weight = layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_3960_cast_fp16")]; + tensor var_3966_pad_type_0 = const()[name = tensor("op_3966_pad_type_0"), val = tensor("valid")]; + tensor var_3966_strides_0 = const()[name = tensor("op_3966_strides_0"), val = tensor([1, 1])]; + tensor var_3966_pad_0 = const()[name = tensor("op_3966_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3966_dilations_0 = const()[name = tensor("op_3966_dilations_0"), val = tensor([1, 1])]; + tensor var_3966_groups_0 = const()[name = tensor("op_3966_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287233472))), name = tensor("layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287220672))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3966_cast_fp16 = conv(dilations = var_3966_dilations_0, groups = var_3966_groups_0, pad = var_3966_pad_0, pad_type = var_3966_pad_type_0, strides = var_3966_strides_0, weight = layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_3966_cast_fp16")]; + tensor value_43_cast_fp16 = add(x = var_3960_cast_fp16, y = var_3966_cast_fp16)[name = tensor("value_43_cast_fp16")]; + tensor var_3969 = const()[name = tensor("op_3969"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_43_cast_fp16 = reshape(shape = var_3969, x = query_43_cast_fp16)[name = tensor("mh_q_43_cast_fp16")]; + tensor var_3971_to_fp16 = const()[name = tensor("op_3971_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3972_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3971_to_fp16)[name = tensor("op_3972_cast_fp16")]; + tensor var_3973 = const()[name = tensor("op_3973"), val = tensor([1, 20, 64, -1])]; + tensor var_3974_cast_fp16 = reshape(shape = var_3973, x = key_43_cast_fp16)[name = tensor("op_3974_cast_fp16")]; + tensor mh_w_43_transpose_x_0 = const()[name = tensor("mh_w_43_transpose_x_0"), val = tensor(true)]; + tensor mh_w_43_transpose_y_0 = const()[name = tensor("mh_w_43_transpose_y_0"), val = tensor(false)]; + tensor mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_3972_cast_fp16, y = var_3974_cast_fp16)[name = tensor("mh_w_43_cast_fp16")]; + tensor var_3977_cast_fp16 = softmax(axis = var_3888, x = mh_w_43_cast_fp16)[name = tensor("op_3977_cast_fp16")]; + tensor var_3978 = const()[name = tensor("op_3978"), val = tensor([1, 20, 64, -1])]; + tensor var_3979_cast_fp16 = reshape(shape = var_3978, x = value_43_cast_fp16)[name = tensor("op_3979_cast_fp16")]; + tensor attn_43_transpose_x_0 = const()[name = tensor("attn_43_transpose_x_0"), val = tensor(false)]; + tensor attn_43_transpose_y_0 = const()[name = tensor("attn_43_transpose_y_0"), val = tensor(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3979_cast_fp16, y = var_3977_cast_fp16)[name = tensor("attn_43_cast_fp16")]; + tensor var_3982 = const()[name = tensor("op_3982"), val = tensor([1, 1280, 1, -1])]; + tensor input_169_cast_fp16 = reshape(shape = var_3982, x = attn_43_cast_fp16)[name = tensor("input_169_cast_fp16")]; + tensor var_3992_pad_type_0 = const()[name = tensor("op_3992_pad_type_0"), val = tensor("valid")]; + tensor var_3992_strides_0 = const()[name = tensor("op_3992_strides_0"), val = tensor([1, 1])]; + tensor var_3992_pad_0 = const()[name = tensor("op_3992_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3992_dilations_0 = const()[name = tensor("op_3992_dilations_0"), val = tensor([1, 1])]; + tensor var_3992_groups_0 = const()[name = tensor("op_3992_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287438336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288257600))), name = tensor("layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_21_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288257728)))]; + tensor var_3992_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3992_dilations_0, groups = var_3992_groups_0, pad = var_3992_pad_0, pad_type = var_3992_pad_type_0, strides = var_3992_strides_0, weight = layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = tensor("op_3992_cast_fp16")]; + tensor var_3998_pad_type_0 = const()[name = tensor("op_3998_pad_type_0"), val = tensor("valid")]; + tensor var_3998_strides_0 = const()[name = tensor("op_3998_strides_0"), val = tensor([1, 1])]; + tensor var_3998_pad_0 = const()[name = tensor("op_3998_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3998_dilations_0 = const()[name = tensor("op_3998_dilations_0"), val = tensor([1, 1])]; + tensor var_3998_groups_0 = const()[name = tensor("op_3998_groups_0"), val = tensor(1)]; + tensor layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288272320))), name = tensor("layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288260352))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_3998_cast_fp16 = conv(dilations = var_3998_dilations_0, groups = var_3998_groups_0, pad = var_3998_pad_0, pad_type = var_3998_pad_type_0, strides = var_3998_strides_0, weight = layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_169_cast_fp16)[name = tensor("op_3998_cast_fp16")]; + tensor obj_87_cast_fp16 = add(x = var_3992_cast_fp16, y = var_3998_cast_fp16)[name = tensor("obj_87_cast_fp16")]; + tensor inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_87_cast_fp16)[name = tensor("inputs_87_cast_fp16")]; + tensor out_87_axes_0 = const()[name = tensor("out_87_axes_0"), val = tensor([1])]; + tensor var_4009_to_fp16 = const()[name = tensor("op_4009_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_87_cast_fp16 = layer_norm(axes = out_87_axes_0, epsilon = var_4009_to_fp16, x = inputs_87_cast_fp16)[name = tensor("out_87_cast_fp16")]; + tensor input_171_gamma_0_to_fp16 = const()[name = tensor("input_171_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288477184)))]; + tensor input_171_beta_0_to_fp16 = const()[name = tensor("input_171_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288479808)))]; + tensor input_171_epsilon_0_to_fp16 = const()[name = tensor("input_171_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_171_cast_fp16 = batch_norm(beta = input_171_beta_0_to_fp16, epsilon = input_171_epsilon_0_to_fp16, gamma = input_171_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = tensor("input_171_cast_fp16")]; + tensor var_4027_pad_type_0 = const()[name = tensor("op_4027_pad_type_0"), val = tensor("valid")]; + tensor var_4027_strides_0 = const()[name = tensor("op_4027_strides_0"), val = tensor([1, 1])]; + tensor var_4027_pad_0 = const()[name = tensor("op_4027_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4027_dilations_0 = const()[name = tensor("op_4027_dilations_0"), val = tensor([1, 1])]; + tensor var_4027_groups_0 = const()[name = tensor("op_4027_groups_0"), val = tensor(1)]; + tensor layers_21_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288482432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291759296))), name = tensor("layers_21_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_21_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291759424)))]; + tensor var_4027_cast_fp16 = conv(bias = layers_21_fc1_inlier_module_bias_to_fp16, dilations = var_4027_dilations_0, groups = var_4027_groups_0, pad = var_4027_pad_0, pad_type = var_4027_pad_type_0, strides = var_4027_strides_0, weight = layers_21_fc1_inlier_module_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = tensor("op_4027_cast_fp16")]; + tensor var_4033_pad_type_0 = const()[name = tensor("op_4033_pad_type_0"), val = tensor("valid")]; + tensor var_4033_strides_0 = const()[name = tensor("op_4033_strides_0"), val = tensor([1, 1])]; + tensor var_4033_pad_0 = const()[name = tensor("op_4033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4033_dilations_0 = const()[name = tensor("op_4033_dilations_0"), val = tensor([1, 1])]; + tensor var_4033_groups_0 = const()[name = tensor("op_4033_groups_0"), val = tensor(1)]; + tensor layers_21_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291830912))), name = tensor("layers_21_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291769728))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4033_cast_fp16 = conv(dilations = var_4033_dilations_0, groups = var_4033_groups_0, pad = var_4033_pad_0, pad_type = var_4033_pad_type_0, strides = var_4033_strides_0, weight = layers_21_fc1_outlier_module_weight_to_fp16_sparsified, x = input_171_cast_fp16)[name = tensor("op_4033_cast_fp16")]; + tensor input_173_cast_fp16 = add(x = var_4027_cast_fp16, y = var_4033_cast_fp16)[name = tensor("input_173_cast_fp16")]; + tensor input_175_mode_0 = const()[name = tensor("input_175_mode_0"), val = tensor("EXACT")]; + tensor input_175_cast_fp16 = gelu(mode = input_175_mode_0, x = input_173_cast_fp16)[name = tensor("input_175_cast_fp16")]; + tensor var_4044_pad_type_0 = const()[name = tensor("op_4044_pad_type_0"), val = tensor("valid")]; + tensor var_4044_strides_0 = const()[name = tensor("op_4044_strides_0"), val = tensor([1, 1])]; + tensor var_4044_pad_0 = const()[name = tensor("op_4044_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4044_dilations_0 = const()[name = tensor("op_4044_dilations_0"), val = tensor([1, 1])]; + tensor var_4044_groups_0 = const()[name = tensor("op_4044_groups_0"), val = tensor(1)]; + tensor layers_21_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(292650176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295927040))), name = tensor("layers_21_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_21_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295927168)))]; + tensor var_4044_cast_fp16 = conv(bias = layers_21_fc2_inlier_module_bias_to_fp16, dilations = var_4044_dilations_0, groups = var_4044_groups_0, pad = var_4044_pad_0, pad_type = var_4044_pad_type_0, strides = var_4044_strides_0, weight = layers_21_fc2_inlier_module_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = tensor("op_4044_cast_fp16")]; + tensor var_4050_pad_type_0 = const()[name = tensor("op_4050_pad_type_0"), val = tensor("valid")]; + tensor var_4050_strides_0 = const()[name = tensor("op_4050_strides_0"), val = tensor([1, 1])]; + tensor var_4050_pad_0 = const()[name = tensor("op_4050_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4050_dilations_0 = const()[name = tensor("op_4050_dilations_0"), val = tensor([1, 1])]; + tensor var_4050_groups_0 = const()[name = tensor("op_4050_groups_0"), val = tensor(1)]; + tensor layers_21_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295992512))), name = tensor("layers_21_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295929792))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4050_cast_fp16 = conv(dilations = var_4050_dilations_0, groups = var_4050_groups_0, pad = var_4050_pad_0, pad_type = var_4050_pad_type_0, strides = var_4050_strides_0, weight = layers_21_fc2_outlier_module_weight_to_fp16_sparsified, x = input_175_cast_fp16)[name = tensor("op_4050_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = add(x = var_4044_cast_fp16, y = var_4050_cast_fp16)[name = tensor("hidden_states_47_cast_fp16")]; + tensor inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = hidden_states_47_cast_fp16)[name = tensor("inputs_89_cast_fp16")]; + tensor var_4060 = const()[name = tensor("op_4060"), val = tensor(3)]; + tensor out_89_axes_0 = const()[name = tensor("out_89_axes_0"), val = tensor([1])]; + tensor var_4079_to_fp16 = const()[name = tensor("op_4079_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_89_cast_fp16 = layer_norm(axes = out_89_axes_0, epsilon = var_4079_to_fp16, x = inputs_89_cast_fp16)[name = tensor("out_89_cast_fp16")]; + tensor obj_89_gamma_0_to_fp16 = const()[name = tensor("obj_89_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296811776)))]; + tensor obj_89_beta_0_to_fp16 = const()[name = tensor("obj_89_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296814400)))]; + tensor obj_89_epsilon_0_to_fp16 = const()[name = tensor("obj_89_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_89_cast_fp16 = batch_norm(beta = obj_89_beta_0_to_fp16, epsilon = obj_89_epsilon_0_to_fp16, gamma = obj_89_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = tensor("obj_89_cast_fp16")]; + tensor var_4101_pad_type_0 = const()[name = tensor("op_4101_pad_type_0"), val = tensor("valid")]; + tensor var_4101_strides_0 = const()[name = tensor("op_4101_strides_0"), val = tensor([1, 1])]; + tensor var_4101_pad_0 = const()[name = tensor("op_4101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4101_dilations_0 = const()[name = tensor("op_4101_dilations_0"), val = tensor([1, 1])]; + tensor var_4101_groups_0 = const()[name = tensor("op_4101_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296817024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297636288))), name = tensor("layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_22_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297636416)))]; + tensor var_4101_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4101_dilations_0, groups = var_4101_groups_0, pad = var_4101_pad_0, pad_type = var_4101_pad_type_0, strides = var_4101_strides_0, weight = layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_4101_cast_fp16")]; + tensor var_4107_pad_type_0 = const()[name = tensor("op_4107_pad_type_0"), val = tensor("valid")]; + tensor var_4107_strides_0 = const()[name = tensor("op_4107_strides_0"), val = tensor([1, 1])]; + tensor var_4107_pad_0 = const()[name = tensor("op_4107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4107_dilations_0 = const()[name = tensor("op_4107_dilations_0"), val = tensor([1, 1])]; + tensor var_4107_groups_0 = const()[name = tensor("op_4107_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297674560))), name = tensor("layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297639040))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4107_cast_fp16 = conv(dilations = var_4107_dilations_0, groups = var_4107_groups_0, pad = var_4107_pad_0, pad_type = var_4107_pad_type_0, strides = var_4107_strides_0, weight = layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_4107_cast_fp16")]; + tensor query_45_cast_fp16 = add(x = var_4101_cast_fp16, y = var_4107_cast_fp16)[name = tensor("query_45_cast_fp16")]; + tensor var_4116_pad_type_0 = const()[name = tensor("op_4116_pad_type_0"), val = tensor("valid")]; + tensor var_4116_strides_0 = const()[name = tensor("op_4116_strides_0"), val = tensor([1, 1])]; + tensor var_4116_pad_0 = const()[name = tensor("op_4116_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4116_dilations_0 = const()[name = tensor("op_4116_dilations_0"), val = tensor([1, 1])]; + tensor var_4116_groups_0 = const()[name = tensor("op_4116_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297879424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298698688))), name = tensor("layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4116_cast_fp16 = conv(dilations = var_4116_dilations_0, groups = var_4116_groups_0, pad = var_4116_pad_0, pad_type = var_4116_pad_type_0, strides = var_4116_strides_0, weight = layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_4116_cast_fp16")]; + tensor var_4122_pad_type_0 = const()[name = tensor("op_4122_pad_type_0"), val = tensor("valid")]; + tensor var_4122_strides_0 = const()[name = tensor("op_4122_strides_0"), val = tensor([1, 1])]; + tensor var_4122_pad_0 = const()[name = tensor("op_4122_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4122_dilations_0 = const()[name = tensor("op_4122_dilations_0"), val = tensor([1, 1])]; + tensor var_4122_groups_0 = const()[name = tensor("op_4122_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298728768))), name = tensor("layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298698816))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4122_cast_fp16 = conv(dilations = var_4122_dilations_0, groups = var_4122_groups_0, pad = var_4122_pad_0, pad_type = var_4122_pad_type_0, strides = var_4122_strides_0, weight = layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_4122_cast_fp16")]; + tensor key_45_cast_fp16 = add(x = var_4116_cast_fp16, y = var_4122_cast_fp16)[name = tensor("key_45_cast_fp16")]; + tensor var_4132_pad_type_0 = const()[name = tensor("op_4132_pad_type_0"), val = tensor("valid")]; + tensor var_4132_strides_0 = const()[name = tensor("op_4132_strides_0"), val = tensor([1, 1])]; + tensor var_4132_pad_0 = const()[name = tensor("op_4132_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4132_dilations_0 = const()[name = tensor("op_4132_dilations_0"), val = tensor([1, 1])]; + tensor var_4132_groups_0 = const()[name = tensor("op_4132_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298933632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299752896))), name = tensor("layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_22_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299753024)))]; + tensor var_4132_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4132_dilations_0, groups = var_4132_groups_0, pad = var_4132_pad_0, pad_type = var_4132_pad_type_0, strides = var_4132_strides_0, weight = layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_4132_cast_fp16")]; + tensor var_4138_pad_type_0 = const()[name = tensor("op_4138_pad_type_0"), val = tensor("valid")]; + tensor var_4138_strides_0 = const()[name = tensor("op_4138_strides_0"), val = tensor([1, 1])]; + tensor var_4138_pad_0 = const()[name = tensor("op_4138_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4138_dilations_0 = const()[name = tensor("op_4138_dilations_0"), val = tensor([1, 1])]; + tensor var_4138_groups_0 = const()[name = tensor("op_4138_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299768064))), name = tensor("layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299755648))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4138_cast_fp16 = conv(dilations = var_4138_dilations_0, groups = var_4138_groups_0, pad = var_4138_pad_0, pad_type = var_4138_pad_type_0, strides = var_4138_strides_0, weight = layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_4138_cast_fp16")]; + tensor value_45_cast_fp16 = add(x = var_4132_cast_fp16, y = var_4138_cast_fp16)[name = tensor("value_45_cast_fp16")]; + tensor var_4141 = const()[name = tensor("op_4141"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_45_cast_fp16 = reshape(shape = var_4141, x = query_45_cast_fp16)[name = tensor("mh_q_45_cast_fp16")]; + tensor var_4143_to_fp16 = const()[name = tensor("op_4143_to_fp16"), val = tensor(0x1p-3)]; + tensor var_4144_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_4143_to_fp16)[name = tensor("op_4144_cast_fp16")]; + tensor var_4145 = const()[name = tensor("op_4145"), val = tensor([1, 20, 64, -1])]; + tensor var_4146_cast_fp16 = reshape(shape = var_4145, x = key_45_cast_fp16)[name = tensor("op_4146_cast_fp16")]; + tensor mh_w_45_transpose_x_0 = const()[name = tensor("mh_w_45_transpose_x_0"), val = tensor(true)]; + tensor mh_w_45_transpose_y_0 = const()[name = tensor("mh_w_45_transpose_y_0"), val = tensor(false)]; + tensor mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_4144_cast_fp16, y = var_4146_cast_fp16)[name = tensor("mh_w_45_cast_fp16")]; + tensor var_4149_cast_fp16 = softmax(axis = var_4060, x = mh_w_45_cast_fp16)[name = tensor("op_4149_cast_fp16")]; + tensor var_4150 = const()[name = tensor("op_4150"), val = tensor([1, 20, 64, -1])]; + tensor var_4151_cast_fp16 = reshape(shape = var_4150, x = value_45_cast_fp16)[name = tensor("op_4151_cast_fp16")]; + tensor attn_45_transpose_x_0 = const()[name = tensor("attn_45_transpose_x_0"), val = tensor(false)]; + tensor attn_45_transpose_y_0 = const()[name = tensor("attn_45_transpose_y_0"), val = tensor(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_4151_cast_fp16, y = var_4149_cast_fp16)[name = tensor("attn_45_cast_fp16")]; + tensor var_4154 = const()[name = tensor("op_4154"), val = tensor([1, 1280, 1, -1])]; + tensor input_177_cast_fp16 = reshape(shape = var_4154, x = attn_45_cast_fp16)[name = tensor("input_177_cast_fp16")]; + tensor var_4164_pad_type_0 = const()[name = tensor("op_4164_pad_type_0"), val = tensor("valid")]; + tensor var_4164_strides_0 = const()[name = tensor("op_4164_strides_0"), val = tensor([1, 1])]; + tensor var_4164_pad_0 = const()[name = tensor("op_4164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4164_dilations_0 = const()[name = tensor("op_4164_dilations_0"), val = tensor([1, 1])]; + tensor var_4164_groups_0 = const()[name = tensor("op_4164_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299972928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300792192))), name = tensor("layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_22_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300792320)))]; + tensor var_4164_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4164_dilations_0, groups = var_4164_groups_0, pad = var_4164_pad_0, pad_type = var_4164_pad_type_0, strides = var_4164_strides_0, weight = layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_177_cast_fp16)[name = tensor("op_4164_cast_fp16")]; + tensor var_4170_pad_type_0 = const()[name = tensor("op_4170_pad_type_0"), val = tensor("valid")]; + tensor var_4170_strides_0 = const()[name = tensor("op_4170_strides_0"), val = tensor([1, 1])]; + tensor var_4170_pad_0 = const()[name = tensor("op_4170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4170_dilations_0 = const()[name = tensor("op_4170_dilations_0"), val = tensor([1, 1])]; + tensor var_4170_groups_0 = const()[name = tensor("op_4170_groups_0"), val = tensor(1)]; + tensor layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300807232))), name = tensor("layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300794944))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4170_cast_fp16 = conv(dilations = var_4170_dilations_0, groups = var_4170_groups_0, pad = var_4170_pad_0, pad_type = var_4170_pad_type_0, strides = var_4170_strides_0, weight = layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_177_cast_fp16)[name = tensor("op_4170_cast_fp16")]; + tensor obj_91_cast_fp16 = add(x = var_4164_cast_fp16, y = var_4170_cast_fp16)[name = tensor("obj_91_cast_fp16")]; + tensor inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = obj_91_cast_fp16)[name = tensor("inputs_91_cast_fp16")]; + tensor out_91_axes_0 = const()[name = tensor("out_91_axes_0"), val = tensor([1])]; + tensor var_4181_to_fp16 = const()[name = tensor("op_4181_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_4181_to_fp16, x = inputs_91_cast_fp16)[name = tensor("out_91_cast_fp16")]; + tensor input_179_gamma_0_to_fp16 = const()[name = tensor("input_179_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301012096)))]; + tensor input_179_beta_0_to_fp16 = const()[name = tensor("input_179_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301014720)))]; + tensor input_179_epsilon_0_to_fp16 = const()[name = tensor("input_179_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_179_cast_fp16 = batch_norm(beta = input_179_beta_0_to_fp16, epsilon = input_179_epsilon_0_to_fp16, gamma = input_179_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = tensor("input_179_cast_fp16")]; + tensor var_4199_pad_type_0 = const()[name = tensor("op_4199_pad_type_0"), val = tensor("valid")]; + tensor var_4199_strides_0 = const()[name = tensor("op_4199_strides_0"), val = tensor([1, 1])]; + tensor var_4199_pad_0 = const()[name = tensor("op_4199_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4199_dilations_0 = const()[name = tensor("op_4199_dilations_0"), val = tensor([1, 1])]; + tensor var_4199_groups_0 = const()[name = tensor("op_4199_groups_0"), val = tensor(1)]; + tensor layers_22_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301017344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304294208))), name = tensor("layers_22_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_22_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304294336)))]; + tensor var_4199_cast_fp16 = conv(bias = layers_22_fc1_inlier_module_bias_to_fp16, dilations = var_4199_dilations_0, groups = var_4199_groups_0, pad = var_4199_pad_0, pad_type = var_4199_pad_type_0, strides = var_4199_strides_0, weight = layers_22_fc1_inlier_module_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = tensor("op_4199_cast_fp16")]; + tensor var_4205_pad_type_0 = const()[name = tensor("op_4205_pad_type_0"), val = tensor("valid")]; + tensor var_4205_strides_0 = const()[name = tensor("op_4205_strides_0"), val = tensor([1, 1])]; + tensor var_4205_pad_0 = const()[name = tensor("op_4205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4205_dilations_0 = const()[name = tensor("op_4205_dilations_0"), val = tensor([1, 1])]; + tensor var_4205_groups_0 = const()[name = tensor("op_4205_groups_0"), val = tensor(1)]; + tensor layers_22_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304372736))), name = tensor("layers_22_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304304640))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4205_cast_fp16 = conv(dilations = var_4205_dilations_0, groups = var_4205_groups_0, pad = var_4205_pad_0, pad_type = var_4205_pad_type_0, strides = var_4205_strides_0, weight = layers_22_fc1_outlier_module_weight_to_fp16_sparsified, x = input_179_cast_fp16)[name = tensor("op_4205_cast_fp16")]; + tensor input_181_cast_fp16 = add(x = var_4199_cast_fp16, y = var_4205_cast_fp16)[name = tensor("input_181_cast_fp16")]; + tensor input_183_mode_0 = const()[name = tensor("input_183_mode_0"), val = tensor("EXACT")]; + tensor input_183_cast_fp16 = gelu(mode = input_183_mode_0, x = input_181_cast_fp16)[name = tensor("input_183_cast_fp16")]; + tensor var_4216_pad_type_0 = const()[name = tensor("op_4216_pad_type_0"), val = tensor("valid")]; + tensor var_4216_strides_0 = const()[name = tensor("op_4216_strides_0"), val = tensor([1, 1])]; + tensor var_4216_pad_0 = const()[name = tensor("op_4216_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4216_dilations_0 = const()[name = tensor("op_4216_dilations_0"), val = tensor([1, 1])]; + tensor var_4216_groups_0 = const()[name = tensor("op_4216_groups_0"), val = tensor(1)]; + tensor layers_22_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(305192000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308468864))), name = tensor("layers_22_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_22_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308468992)))]; + tensor var_4216_cast_fp16 = conv(bias = layers_22_fc2_inlier_module_bias_to_fp16, dilations = var_4216_dilations_0, groups = var_4216_groups_0, pad = var_4216_pad_0, pad_type = var_4216_pad_type_0, strides = var_4216_strides_0, weight = layers_22_fc2_inlier_module_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = tensor("op_4216_cast_fp16")]; + tensor var_4222_pad_type_0 = const()[name = tensor("op_4222_pad_type_0"), val = tensor("valid")]; + tensor var_4222_strides_0 = const()[name = tensor("op_4222_strides_0"), val = tensor([1, 1])]; + tensor var_4222_pad_0 = const()[name = tensor("op_4222_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4222_dilations_0 = const()[name = tensor("op_4222_dilations_0"), val = tensor([1, 1])]; + tensor var_4222_groups_0 = const()[name = tensor("op_4222_groups_0"), val = tensor(1)]; + tensor layers_22_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308533376))), name = tensor("layers_22_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308471616))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4222_cast_fp16 = conv(dilations = var_4222_dilations_0, groups = var_4222_groups_0, pad = var_4222_pad_0, pad_type = var_4222_pad_type_0, strides = var_4222_strides_0, weight = layers_22_fc2_outlier_module_weight_to_fp16_sparsified, x = input_183_cast_fp16)[name = tensor("op_4222_cast_fp16")]; + tensor hidden_states_49_cast_fp16 = add(x = var_4216_cast_fp16, y = var_4222_cast_fp16)[name = tensor("hidden_states_49_cast_fp16")]; + tensor inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor("inputs_93_cast_fp16")]; + tensor var_4232 = const()[name = tensor("op_4232"), val = tensor(3)]; + tensor out_93_axes_0 = const()[name = tensor("out_93_axes_0"), val = tensor([1])]; + tensor var_4251_to_fp16 = const()[name = tensor("op_4251_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_93_cast_fp16 = layer_norm(axes = out_93_axes_0, epsilon = var_4251_to_fp16, x = inputs_93_cast_fp16)[name = tensor("out_93_cast_fp16")]; + tensor obj_93_gamma_0_to_fp16 = const()[name = tensor("obj_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309352640)))]; + tensor obj_93_beta_0_to_fp16 = const()[name = tensor("obj_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309355264)))]; + tensor obj_93_epsilon_0_to_fp16 = const()[name = tensor("obj_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = tensor("obj_93_cast_fp16")]; + tensor var_4273_pad_type_0 = const()[name = tensor("op_4273_pad_type_0"), val = tensor("valid")]; + tensor var_4273_strides_0 = const()[name = tensor("op_4273_strides_0"), val = tensor([1, 1])]; + tensor var_4273_pad_0 = const()[name = tensor("op_4273_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4273_dilations_0 = const()[name = tensor("op_4273_dilations_0"), val = tensor([1, 1])]; + tensor var_4273_groups_0 = const()[name = tensor("op_4273_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309357888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310177152))), name = tensor("layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_23_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310177280)))]; + tensor var_4273_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4273_dilations_0, groups = var_4273_groups_0, pad = var_4273_pad_0, pad_type = var_4273_pad_type_0, strides = var_4273_strides_0, weight = layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_4273_cast_fp16")]; + tensor var_4279_pad_type_0 = const()[name = tensor("op_4279_pad_type_0"), val = tensor("valid")]; + tensor var_4279_strides_0 = const()[name = tensor("op_4279_strides_0"), val = tensor([1, 1])]; + tensor var_4279_pad_0 = const()[name = tensor("op_4279_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4279_dilations_0 = const()[name = tensor("op_4279_dilations_0"), val = tensor([1, 1])]; + tensor var_4279_groups_0 = const()[name = tensor("op_4279_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310208448))), name = tensor("layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310179904))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4279_cast_fp16 = conv(dilations = var_4279_dilations_0, groups = var_4279_groups_0, pad = var_4279_pad_0, pad_type = var_4279_pad_type_0, strides = var_4279_strides_0, weight = layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_4279_cast_fp16")]; + tensor query_47_cast_fp16 = add(x = var_4273_cast_fp16, y = var_4279_cast_fp16)[name = tensor("query_47_cast_fp16")]; + tensor var_4288_pad_type_0 = const()[name = tensor("op_4288_pad_type_0"), val = tensor("valid")]; + tensor var_4288_strides_0 = const()[name = tensor("op_4288_strides_0"), val = tensor([1, 1])]; + tensor var_4288_pad_0 = const()[name = tensor("op_4288_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4288_dilations_0 = const()[name = tensor("op_4288_dilations_0"), val = tensor([1, 1])]; + tensor var_4288_groups_0 = const()[name = tensor("op_4288_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310413312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311232576))), name = tensor("layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4288_cast_fp16 = conv(dilations = var_4288_dilations_0, groups = var_4288_groups_0, pad = var_4288_pad_0, pad_type = var_4288_pad_type_0, strides = var_4288_strides_0, weight = layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_4288_cast_fp16")]; + tensor var_4294_pad_type_0 = const()[name = tensor("op_4294_pad_type_0"), val = tensor("valid")]; + tensor var_4294_strides_0 = const()[name = tensor("op_4294_strides_0"), val = tensor([1, 1])]; + tensor var_4294_pad_0 = const()[name = tensor("op_4294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4294_dilations_0 = const()[name = tensor("op_4294_dilations_0"), val = tensor([1, 1])]; + tensor var_4294_groups_0 = const()[name = tensor("op_4294_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311257088))), name = tensor("layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311232704))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4294_cast_fp16 = conv(dilations = var_4294_dilations_0, groups = var_4294_groups_0, pad = var_4294_pad_0, pad_type = var_4294_pad_type_0, strides = var_4294_strides_0, weight = layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_4294_cast_fp16")]; + tensor key_47_cast_fp16 = add(x = var_4288_cast_fp16, y = var_4294_cast_fp16)[name = tensor("key_47_cast_fp16")]; + tensor var_4304_pad_type_0 = const()[name = tensor("op_4304_pad_type_0"), val = tensor("valid")]; + tensor var_4304_strides_0 = const()[name = tensor("op_4304_strides_0"), val = tensor([1, 1])]; + tensor var_4304_pad_0 = const()[name = tensor("op_4304_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4304_dilations_0 = const()[name = tensor("op_4304_dilations_0"), val = tensor([1, 1])]; + tensor var_4304_groups_0 = const()[name = tensor("op_4304_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311461952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312281216))), name = tensor("layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_23_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312281344)))]; + tensor var_4304_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4304_dilations_0, groups = var_4304_groups_0, pad = var_4304_pad_0, pad_type = var_4304_pad_type_0, strides = var_4304_strides_0, weight = layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_4304_cast_fp16")]; + tensor var_4310_pad_type_0 = const()[name = tensor("op_4310_pad_type_0"), val = tensor("valid")]; + tensor var_4310_strides_0 = const()[name = tensor("op_4310_strides_0"), val = tensor([1, 1])]; + tensor var_4310_pad_0 = const()[name = tensor("op_4310_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4310_dilations_0 = const()[name = tensor("op_4310_dilations_0"), val = tensor([1, 1])]; + tensor var_4310_groups_0 = const()[name = tensor("op_4310_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312297152))), name = tensor("layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312283968))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4310_cast_fp16 = conv(dilations = var_4310_dilations_0, groups = var_4310_groups_0, pad = var_4310_pad_0, pad_type = var_4310_pad_type_0, strides = var_4310_strides_0, weight = layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_4310_cast_fp16")]; + tensor value_47_cast_fp16 = add(x = var_4304_cast_fp16, y = var_4310_cast_fp16)[name = tensor("value_47_cast_fp16")]; + tensor var_4313 = const()[name = tensor("op_4313"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_47_cast_fp16 = reshape(shape = var_4313, x = query_47_cast_fp16)[name = tensor("mh_q_47_cast_fp16")]; + tensor var_4315_to_fp16 = const()[name = tensor("op_4315_to_fp16"), val = tensor(0x1p-3)]; + tensor var_4316_cast_fp16 = mul(x = mh_q_47_cast_fp16, y = var_4315_to_fp16)[name = tensor("op_4316_cast_fp16")]; + tensor var_4317 = const()[name = tensor("op_4317"), val = tensor([1, 20, 64, -1])]; + tensor var_4318_cast_fp16 = reshape(shape = var_4317, x = key_47_cast_fp16)[name = tensor("op_4318_cast_fp16")]; + tensor mh_w_47_transpose_x_0 = const()[name = tensor("mh_w_47_transpose_x_0"), val = tensor(true)]; + tensor mh_w_47_transpose_y_0 = const()[name = tensor("mh_w_47_transpose_y_0"), val = tensor(false)]; + tensor mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_4316_cast_fp16, y = var_4318_cast_fp16)[name = tensor("mh_w_47_cast_fp16")]; + tensor var_4321_cast_fp16 = softmax(axis = var_4232, x = mh_w_47_cast_fp16)[name = tensor("op_4321_cast_fp16")]; + tensor var_4322 = const()[name = tensor("op_4322"), val = tensor([1, 20, 64, -1])]; + tensor var_4323_cast_fp16 = reshape(shape = var_4322, x = value_47_cast_fp16)[name = tensor("op_4323_cast_fp16")]; + tensor attn_47_transpose_x_0 = const()[name = tensor("attn_47_transpose_x_0"), val = tensor(false)]; + tensor attn_47_transpose_y_0 = const()[name = tensor("attn_47_transpose_y_0"), val = tensor(true)]; + tensor attn_47_cast_fp16 = matmul(transpose_x = attn_47_transpose_x_0, transpose_y = attn_47_transpose_y_0, x = var_4323_cast_fp16, y = var_4321_cast_fp16)[name = tensor("attn_47_cast_fp16")]; + tensor var_4326 = const()[name = tensor("op_4326"), val = tensor([1, 1280, 1, -1])]; + tensor input_185_cast_fp16 = reshape(shape = var_4326, x = attn_47_cast_fp16)[name = tensor("input_185_cast_fp16")]; + tensor var_4336_pad_type_0 = const()[name = tensor("op_4336_pad_type_0"), val = tensor("valid")]; + tensor var_4336_strides_0 = const()[name = tensor("op_4336_strides_0"), val = tensor([1, 1])]; + tensor var_4336_pad_0 = const()[name = tensor("op_4336_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4336_dilations_0 = const()[name = tensor("op_4336_dilations_0"), val = tensor([1, 1])]; + tensor var_4336_groups_0 = const()[name = tensor("op_4336_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312502016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313321280))), name = tensor("layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_23_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313321408)))]; + tensor var_4336_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4336_dilations_0, groups = var_4336_groups_0, pad = var_4336_pad_0, pad_type = var_4336_pad_type_0, strides = var_4336_strides_0, weight = layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = tensor("op_4336_cast_fp16")]; + tensor var_4342_pad_type_0 = const()[name = tensor("op_4342_pad_type_0"), val = tensor("valid")]; + tensor var_4342_strides_0 = const()[name = tensor("op_4342_strides_0"), val = tensor([1, 1])]; + tensor var_4342_pad_0 = const()[name = tensor("op_4342_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4342_dilations_0 = const()[name = tensor("op_4342_dilations_0"), val = tensor([1, 1])]; + tensor var_4342_groups_0 = const()[name = tensor("op_4342_groups_0"), val = tensor(1)]; + tensor layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313337280))), name = tensor("layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313324032))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4342_cast_fp16 = conv(dilations = var_4342_dilations_0, groups = var_4342_groups_0, pad = var_4342_pad_0, pad_type = var_4342_pad_type_0, strides = var_4342_strides_0, weight = layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_185_cast_fp16)[name = tensor("op_4342_cast_fp16")]; + tensor obj_95_cast_fp16 = add(x = var_4336_cast_fp16, y = var_4342_cast_fp16)[name = tensor("obj_95_cast_fp16")]; + tensor inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_95_cast_fp16)[name = tensor("inputs_95_cast_fp16")]; + tensor out_95_axes_0 = const()[name = tensor("out_95_axes_0"), val = tensor([1])]; + tensor var_4353_to_fp16 = const()[name = tensor("op_4353_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_95_cast_fp16 = layer_norm(axes = out_95_axes_0, epsilon = var_4353_to_fp16, x = inputs_95_cast_fp16)[name = tensor("out_95_cast_fp16")]; + tensor input_187_gamma_0_to_fp16 = const()[name = tensor("input_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313542144)))]; + tensor input_187_beta_0_to_fp16 = const()[name = tensor("input_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313544768)))]; + tensor input_187_epsilon_0_to_fp16 = const()[name = tensor("input_187_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_187_cast_fp16 = batch_norm(beta = input_187_beta_0_to_fp16, epsilon = input_187_epsilon_0_to_fp16, gamma = input_187_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = tensor("input_187_cast_fp16")]; + tensor var_4371_pad_type_0 = const()[name = tensor("op_4371_pad_type_0"), val = tensor("valid")]; + tensor var_4371_strides_0 = const()[name = tensor("op_4371_strides_0"), val = tensor([1, 1])]; + tensor var_4371_pad_0 = const()[name = tensor("op_4371_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4371_dilations_0 = const()[name = tensor("op_4371_dilations_0"), val = tensor([1, 1])]; + tensor var_4371_groups_0 = const()[name = tensor("op_4371_groups_0"), val = tensor(1)]; + tensor layers_23_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313547392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316824256))), name = tensor("layers_23_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_23_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316824384)))]; + tensor var_4371_cast_fp16 = conv(bias = layers_23_fc1_inlier_module_bias_to_fp16, dilations = var_4371_dilations_0, groups = var_4371_groups_0, pad = var_4371_pad_0, pad_type = var_4371_pad_type_0, strides = var_4371_strides_0, weight = layers_23_fc1_inlier_module_weight_to_fp16_palettized, x = input_187_cast_fp16)[name = tensor("op_4371_cast_fp16")]; + tensor var_4377_pad_type_0 = const()[name = tensor("op_4377_pad_type_0"), val = tensor("valid")]; + tensor var_4377_strides_0 = const()[name = tensor("op_4377_strides_0"), val = tensor([1, 1])]; + tensor var_4377_pad_0 = const()[name = tensor("op_4377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4377_dilations_0 = const()[name = tensor("op_4377_dilations_0"), val = tensor([1, 1])]; + tensor var_4377_groups_0 = const()[name = tensor("op_4377_groups_0"), val = tensor(1)]; + tensor layers_23_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316896960))), name = tensor("layers_23_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316834688))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4377_cast_fp16 = conv(dilations = var_4377_dilations_0, groups = var_4377_groups_0, pad = var_4377_pad_0, pad_type = var_4377_pad_type_0, strides = var_4377_strides_0, weight = layers_23_fc1_outlier_module_weight_to_fp16_sparsified, x = input_187_cast_fp16)[name = tensor("op_4377_cast_fp16")]; + tensor input_189_cast_fp16 = add(x = var_4371_cast_fp16, y = var_4377_cast_fp16)[name = tensor("input_189_cast_fp16")]; + tensor input_191_mode_0 = const()[name = tensor("input_191_mode_0"), val = tensor("EXACT")]; + tensor input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor("input_191_cast_fp16")]; + tensor var_4388_pad_type_0 = const()[name = tensor("op_4388_pad_type_0"), val = tensor("valid")]; + tensor var_4388_strides_0 = const()[name = tensor("op_4388_strides_0"), val = tensor([1, 1])]; + tensor var_4388_pad_0 = const()[name = tensor("op_4388_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4388_dilations_0 = const()[name = tensor("op_4388_dilations_0"), val = tensor([1, 1])]; + tensor var_4388_groups_0 = const()[name = tensor("op_4388_groups_0"), val = tensor(1)]; + tensor layers_23_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(317716224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320993088))), name = tensor("layers_23_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_23_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320993216)))]; + tensor var_4388_cast_fp16 = conv(bias = layers_23_fc2_inlier_module_bias_to_fp16, dilations = var_4388_dilations_0, groups = var_4388_groups_0, pad = var_4388_pad_0, pad_type = var_4388_pad_type_0, strides = var_4388_strides_0, weight = layers_23_fc2_inlier_module_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = tensor("op_4388_cast_fp16")]; + tensor var_4394_pad_type_0 = const()[name = tensor("op_4394_pad_type_0"), val = tensor("valid")]; + tensor var_4394_strides_0 = const()[name = tensor("op_4394_strides_0"), val = tensor([1, 1])]; + tensor var_4394_pad_0 = const()[name = tensor("op_4394_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4394_dilations_0 = const()[name = tensor("op_4394_dilations_0"), val = tensor([1, 1])]; + tensor var_4394_groups_0 = const()[name = tensor("op_4394_groups_0"), val = tensor(1)]; + tensor layers_23_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321052864))), name = tensor("layers_23_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320995840))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4394_cast_fp16 = conv(dilations = var_4394_dilations_0, groups = var_4394_groups_0, pad = var_4394_pad_0, pad_type = var_4394_pad_type_0, strides = var_4394_strides_0, weight = layers_23_fc2_outlier_module_weight_to_fp16_sparsified, x = input_191_cast_fp16)[name = tensor("op_4394_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = var_4388_cast_fp16, y = var_4394_cast_fp16)[name = tensor("hidden_states_51_cast_fp16")]; + tensor inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_51_cast_fp16)[name = tensor("inputs_97_cast_fp16")]; + tensor var_4404 = const()[name = tensor("op_4404"), val = tensor(3)]; + tensor out_97_axes_0 = const()[name = tensor("out_97_axes_0"), val = tensor([1])]; + tensor var_4423_to_fp16 = const()[name = tensor("op_4423_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_4423_to_fp16, x = inputs_97_cast_fp16)[name = tensor("out_97_cast_fp16")]; + tensor obj_97_gamma_0_to_fp16 = const()[name = tensor("obj_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321872128)))]; + tensor obj_97_beta_0_to_fp16 = const()[name = tensor("obj_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321874752)))]; + tensor obj_97_epsilon_0_to_fp16 = const()[name = tensor("obj_97_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_97_cast_fp16)[name = tensor("obj_97_cast_fp16")]; + tensor var_4445_pad_type_0 = const()[name = tensor("op_4445_pad_type_0"), val = tensor("valid")]; + tensor var_4445_strides_0 = const()[name = tensor("op_4445_strides_0"), val = tensor([1, 1])]; + tensor var_4445_pad_0 = const()[name = tensor("op_4445_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4445_dilations_0 = const()[name = tensor("op_4445_dilations_0"), val = tensor([1, 1])]; + tensor var_4445_groups_0 = const()[name = tensor("op_4445_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321877376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322696640))), name = tensor("layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_24_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322696768)))]; + tensor var_4445_cast_fp16 = conv(bias = layers_24_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4445_dilations_0, groups = var_4445_groups_0, pad = var_4445_pad_0, pad_type = var_4445_pad_type_0, strides = var_4445_strides_0, weight = layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_4445_cast_fp16")]; + tensor var_4451_pad_type_0 = const()[name = tensor("op_4451_pad_type_0"), val = tensor("valid")]; + tensor var_4451_strides_0 = const()[name = tensor("op_4451_strides_0"), val = tensor([1, 1])]; + tensor var_4451_pad_0 = const()[name = tensor("op_4451_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4451_dilations_0 = const()[name = tensor("op_4451_dilations_0"), val = tensor([1, 1])]; + tensor var_4451_groups_0 = const()[name = tensor("op_4451_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322733056))), name = tensor("layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322699392))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4451_cast_fp16 = conv(dilations = var_4451_dilations_0, groups = var_4451_groups_0, pad = var_4451_pad_0, pad_type = var_4451_pad_type_0, strides = var_4451_strides_0, weight = layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_4451_cast_fp16")]; + tensor query_49_cast_fp16 = add(x = var_4445_cast_fp16, y = var_4451_cast_fp16)[name = tensor("query_49_cast_fp16")]; + tensor var_4460_pad_type_0 = const()[name = tensor("op_4460_pad_type_0"), val = tensor("valid")]; + tensor var_4460_strides_0 = const()[name = tensor("op_4460_strides_0"), val = tensor([1, 1])]; + tensor var_4460_pad_0 = const()[name = tensor("op_4460_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4460_dilations_0 = const()[name = tensor("op_4460_dilations_0"), val = tensor([1, 1])]; + tensor var_4460_groups_0 = const()[name = tensor("op_4460_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322937920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323757184))), name = tensor("layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4460_cast_fp16 = conv(dilations = var_4460_dilations_0, groups = var_4460_groups_0, pad = var_4460_pad_0, pad_type = var_4460_pad_type_0, strides = var_4460_strides_0, weight = layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_4460_cast_fp16")]; + tensor var_4466_pad_type_0 = const()[name = tensor("op_4466_pad_type_0"), val = tensor("valid")]; + tensor var_4466_strides_0 = const()[name = tensor("op_4466_strides_0"), val = tensor([1, 1])]; + tensor var_4466_pad_0 = const()[name = tensor("op_4466_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4466_dilations_0 = const()[name = tensor("op_4466_dilations_0"), val = tensor([1, 1])]; + tensor var_4466_groups_0 = const()[name = tensor("op_4466_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323783744))), name = tensor("layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323757312))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4466_cast_fp16 = conv(dilations = var_4466_dilations_0, groups = var_4466_groups_0, pad = var_4466_pad_0, pad_type = var_4466_pad_type_0, strides = var_4466_strides_0, weight = layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_4466_cast_fp16")]; + tensor key_49_cast_fp16 = add(x = var_4460_cast_fp16, y = var_4466_cast_fp16)[name = tensor("key_49_cast_fp16")]; + tensor var_4476_pad_type_0 = const()[name = tensor("op_4476_pad_type_0"), val = tensor("valid")]; + tensor var_4476_strides_0 = const()[name = tensor("op_4476_strides_0"), val = tensor([1, 1])]; + tensor var_4476_pad_0 = const()[name = tensor("op_4476_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4476_dilations_0 = const()[name = tensor("op_4476_dilations_0"), val = tensor([1, 1])]; + tensor var_4476_groups_0 = const()[name = tensor("op_4476_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323988608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324807872))), name = tensor("layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_24_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324808000)))]; + tensor var_4476_cast_fp16 = conv(bias = layers_24_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4476_dilations_0, groups = var_4476_groups_0, pad = var_4476_pad_0, pad_type = var_4476_pad_type_0, strides = var_4476_strides_0, weight = layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_4476_cast_fp16")]; + tensor var_4482_pad_type_0 = const()[name = tensor("op_4482_pad_type_0"), val = tensor("valid")]; + tensor var_4482_strides_0 = const()[name = tensor("op_4482_strides_0"), val = tensor([1, 1])]; + tensor var_4482_pad_0 = const()[name = tensor("op_4482_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4482_dilations_0 = const()[name = tensor("op_4482_dilations_0"), val = tensor([1, 1])]; + tensor var_4482_groups_0 = const()[name = tensor("op_4482_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324822080))), name = tensor("layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324810624))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4482_cast_fp16 = conv(dilations = var_4482_dilations_0, groups = var_4482_groups_0, pad = var_4482_pad_0, pad_type = var_4482_pad_type_0, strides = var_4482_strides_0, weight = layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_4482_cast_fp16")]; + tensor value_49_cast_fp16 = add(x = var_4476_cast_fp16, y = var_4482_cast_fp16)[name = tensor("value_49_cast_fp16")]; + tensor var_4485 = const()[name = tensor("op_4485"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_49_cast_fp16 = reshape(shape = var_4485, x = query_49_cast_fp16)[name = tensor("mh_q_49_cast_fp16")]; + tensor var_4487_to_fp16 = const()[name = tensor("op_4487_to_fp16"), val = tensor(0x1p-3)]; + tensor var_4488_cast_fp16 = mul(x = mh_q_49_cast_fp16, y = var_4487_to_fp16)[name = tensor("op_4488_cast_fp16")]; + tensor var_4489 = const()[name = tensor("op_4489"), val = tensor([1, 20, 64, -1])]; + tensor var_4490_cast_fp16 = reshape(shape = var_4489, x = key_49_cast_fp16)[name = tensor("op_4490_cast_fp16")]; + tensor mh_w_49_transpose_x_0 = const()[name = tensor("mh_w_49_transpose_x_0"), val = tensor(true)]; + tensor mh_w_49_transpose_y_0 = const()[name = tensor("mh_w_49_transpose_y_0"), val = tensor(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_4488_cast_fp16, y = var_4490_cast_fp16)[name = tensor("mh_w_49_cast_fp16")]; + tensor var_4493_cast_fp16 = softmax(axis = var_4404, x = mh_w_49_cast_fp16)[name = tensor("op_4493_cast_fp16")]; + tensor var_4494 = const()[name = tensor("op_4494"), val = tensor([1, 20, 64, -1])]; + tensor var_4495_cast_fp16 = reshape(shape = var_4494, x = value_49_cast_fp16)[name = tensor("op_4495_cast_fp16")]; + tensor attn_49_transpose_x_0 = const()[name = tensor("attn_49_transpose_x_0"), val = tensor(false)]; + tensor attn_49_transpose_y_0 = const()[name = tensor("attn_49_transpose_y_0"), val = tensor(true)]; + tensor attn_49_cast_fp16 = matmul(transpose_x = attn_49_transpose_x_0, transpose_y = attn_49_transpose_y_0, x = var_4495_cast_fp16, y = var_4493_cast_fp16)[name = tensor("attn_49_cast_fp16")]; + tensor var_4498 = const()[name = tensor("op_4498"), val = tensor([1, 1280, 1, -1])]; + tensor input_193_cast_fp16 = reshape(shape = var_4498, x = attn_49_cast_fp16)[name = tensor("input_193_cast_fp16")]; + tensor var_4508_pad_type_0 = const()[name = tensor("op_4508_pad_type_0"), val = tensor("valid")]; + tensor var_4508_strides_0 = const()[name = tensor("op_4508_strides_0"), val = tensor([1, 1])]; + tensor var_4508_pad_0 = const()[name = tensor("op_4508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4508_dilations_0 = const()[name = tensor("op_4508_dilations_0"), val = tensor([1, 1])]; + tensor var_4508_groups_0 = const()[name = tensor("op_4508_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325026944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325846208))), name = tensor("layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_24_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325846336)))]; + tensor var_4508_cast_fp16 = conv(bias = layers_24_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4508_dilations_0, groups = var_4508_groups_0, pad = var_4508_pad_0, pad_type = var_4508_pad_type_0, strides = var_4508_strides_0, weight = layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = tensor("op_4508_cast_fp16")]; + tensor var_4514_pad_type_0 = const()[name = tensor("op_4514_pad_type_0"), val = tensor("valid")]; + tensor var_4514_strides_0 = const()[name = tensor("op_4514_strides_0"), val = tensor([1, 1])]; + tensor var_4514_pad_0 = const()[name = tensor("op_4514_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4514_dilations_0 = const()[name = tensor("op_4514_dilations_0"), val = tensor([1, 1])]; + tensor var_4514_groups_0 = const()[name = tensor("op_4514_groups_0"), val = tensor(1)]; + tensor layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325859712))), name = tensor("layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325848960))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4514_cast_fp16 = conv(dilations = var_4514_dilations_0, groups = var_4514_groups_0, pad = var_4514_pad_0, pad_type = var_4514_pad_type_0, strides = var_4514_strides_0, weight = layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_193_cast_fp16)[name = tensor("op_4514_cast_fp16")]; + tensor obj_99_cast_fp16 = add(x = var_4508_cast_fp16, y = var_4514_cast_fp16)[name = tensor("obj_99_cast_fp16")]; + tensor inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = obj_99_cast_fp16)[name = tensor("inputs_99_cast_fp16")]; + tensor out_99_axes_0 = const()[name = tensor("out_99_axes_0"), val = tensor([1])]; + tensor var_4525_to_fp16 = const()[name = tensor("op_4525_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_99_cast_fp16 = layer_norm(axes = out_99_axes_0, epsilon = var_4525_to_fp16, x = inputs_99_cast_fp16)[name = tensor("out_99_cast_fp16")]; + tensor input_195_gamma_0_to_fp16 = const()[name = tensor("input_195_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326064576)))]; + tensor input_195_beta_0_to_fp16 = const()[name = tensor("input_195_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326067200)))]; + tensor input_195_epsilon_0_to_fp16 = const()[name = tensor("input_195_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_195_cast_fp16 = batch_norm(beta = input_195_beta_0_to_fp16, epsilon = input_195_epsilon_0_to_fp16, gamma = input_195_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_99_cast_fp16)[name = tensor("input_195_cast_fp16")]; + tensor var_4543_pad_type_0 = const()[name = tensor("op_4543_pad_type_0"), val = tensor("valid")]; + tensor var_4543_strides_0 = const()[name = tensor("op_4543_strides_0"), val = tensor([1, 1])]; + tensor var_4543_pad_0 = const()[name = tensor("op_4543_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4543_dilations_0 = const()[name = tensor("op_4543_dilations_0"), val = tensor([1, 1])]; + tensor var_4543_groups_0 = const()[name = tensor("op_4543_groups_0"), val = tensor(1)]; + tensor layers_24_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326069824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329346688))), name = tensor("layers_24_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_24_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329346816)))]; + tensor var_4543_cast_fp16 = conv(bias = layers_24_fc1_inlier_module_bias_to_fp16, dilations = var_4543_dilations_0, groups = var_4543_groups_0, pad = var_4543_pad_0, pad_type = var_4543_pad_type_0, strides = var_4543_strides_0, weight = layers_24_fc1_inlier_module_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = tensor("op_4543_cast_fp16")]; + tensor var_4549_pad_type_0 = const()[name = tensor("op_4549_pad_type_0"), val = tensor("valid")]; + tensor var_4549_strides_0 = const()[name = tensor("op_4549_strides_0"), val = tensor([1, 1])]; + tensor var_4549_pad_0 = const()[name = tensor("op_4549_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4549_dilations_0 = const()[name = tensor("op_4549_dilations_0"), val = tensor([1, 1])]; + tensor var_4549_groups_0 = const()[name = tensor("op_4549_groups_0"), val = tensor(1)]; + tensor layers_24_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329417984))), name = tensor("layers_24_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329357120))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4549_cast_fp16 = conv(dilations = var_4549_dilations_0, groups = var_4549_groups_0, pad = var_4549_pad_0, pad_type = var_4549_pad_type_0, strides = var_4549_strides_0, weight = layers_24_fc1_outlier_module_weight_to_fp16_sparsified, x = input_195_cast_fp16)[name = tensor("op_4549_cast_fp16")]; + tensor input_197_cast_fp16 = add(x = var_4543_cast_fp16, y = var_4549_cast_fp16)[name = tensor("input_197_cast_fp16")]; + tensor input_199_mode_0 = const()[name = tensor("input_199_mode_0"), val = tensor("EXACT")]; + tensor input_199_cast_fp16 = gelu(mode = input_199_mode_0, x = input_197_cast_fp16)[name = tensor("input_199_cast_fp16")]; + tensor var_4560_pad_type_0 = const()[name = tensor("op_4560_pad_type_0"), val = tensor("valid")]; + tensor var_4560_strides_0 = const()[name = tensor("op_4560_strides_0"), val = tensor([1, 1])]; + tensor var_4560_pad_0 = const()[name = tensor("op_4560_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4560_dilations_0 = const()[name = tensor("op_4560_dilations_0"), val = tensor([1, 1])]; + tensor var_4560_groups_0 = const()[name = tensor("op_4560_groups_0"), val = tensor(1)]; + tensor layers_24_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330237248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333514112))), name = tensor("layers_24_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_24_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333514240)))]; + tensor var_4560_cast_fp16 = conv(bias = layers_24_fc2_inlier_module_bias_to_fp16, dilations = var_4560_dilations_0, groups = var_4560_groups_0, pad = var_4560_pad_0, pad_type = var_4560_pad_type_0, strides = var_4560_strides_0, weight = layers_24_fc2_inlier_module_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = tensor("op_4560_cast_fp16")]; + tensor var_4566_pad_type_0 = const()[name = tensor("op_4566_pad_type_0"), val = tensor("valid")]; + tensor var_4566_strides_0 = const()[name = tensor("op_4566_strides_0"), val = tensor([1, 1])]; + tensor var_4566_pad_0 = const()[name = tensor("op_4566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4566_dilations_0 = const()[name = tensor("op_4566_dilations_0"), val = tensor([1, 1])]; + tensor var_4566_groups_0 = const()[name = tensor("op_4566_groups_0"), val = tensor(1)]; + tensor layers_24_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333569216))), name = tensor("layers_24_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333516864))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4566_cast_fp16 = conv(dilations = var_4566_dilations_0, groups = var_4566_groups_0, pad = var_4566_pad_0, pad_type = var_4566_pad_type_0, strides = var_4566_strides_0, weight = layers_24_fc2_outlier_module_weight_to_fp16_sparsified, x = input_199_cast_fp16)[name = tensor("op_4566_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = var_4560_cast_fp16, y = var_4566_cast_fp16)[name = tensor("hidden_states_53_cast_fp16")]; + tensor inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = hidden_states_53_cast_fp16)[name = tensor("inputs_101_cast_fp16")]; + tensor var_4576 = const()[name = tensor("op_4576"), val = tensor(3)]; + tensor out_101_axes_0 = const()[name = tensor("out_101_axes_0"), val = tensor([1])]; + tensor var_4595_to_fp16 = const()[name = tensor("op_4595_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_101_cast_fp16 = layer_norm(axes = out_101_axes_0, epsilon = var_4595_to_fp16, x = inputs_101_cast_fp16)[name = tensor("out_101_cast_fp16")]; + tensor obj_101_gamma_0_to_fp16 = const()[name = tensor("obj_101_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334388480)))]; + tensor obj_101_beta_0_to_fp16 = const()[name = tensor("obj_101_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334391104)))]; + tensor obj_101_epsilon_0_to_fp16 = const()[name = tensor("obj_101_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_101_cast_fp16 = batch_norm(beta = obj_101_beta_0_to_fp16, epsilon = obj_101_epsilon_0_to_fp16, gamma = obj_101_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_101_cast_fp16)[name = tensor("obj_101_cast_fp16")]; + tensor var_4617_pad_type_0 = const()[name = tensor("op_4617_pad_type_0"), val = tensor("valid")]; + tensor var_4617_strides_0 = const()[name = tensor("op_4617_strides_0"), val = tensor([1, 1])]; + tensor var_4617_pad_0 = const()[name = tensor("op_4617_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4617_dilations_0 = const()[name = tensor("op_4617_dilations_0"), val = tensor([1, 1])]; + tensor var_4617_groups_0 = const()[name = tensor("op_4617_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334393728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335212992))), name = tensor("layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_25_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335213120)))]; + tensor var_4617_cast_fp16 = conv(bias = layers_25_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4617_dilations_0, groups = var_4617_groups_0, pad = var_4617_pad_0, pad_type = var_4617_pad_type_0, strides = var_4617_strides_0, weight = layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_4617_cast_fp16")]; + tensor var_4623_pad_type_0 = const()[name = tensor("op_4623_pad_type_0"), val = tensor("valid")]; + tensor var_4623_strides_0 = const()[name = tensor("op_4623_strides_0"), val = tensor([1, 1])]; + tensor var_4623_pad_0 = const()[name = tensor("op_4623_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4623_dilations_0 = const()[name = tensor("op_4623_dilations_0"), val = tensor([1, 1])]; + tensor var_4623_groups_0 = const()[name = tensor("op_4623_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335253376))), name = tensor("layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335215744))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4623_cast_fp16 = conv(dilations = var_4623_dilations_0, groups = var_4623_groups_0, pad = var_4623_pad_0, pad_type = var_4623_pad_type_0, strides = var_4623_strides_0, weight = layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_4623_cast_fp16")]; + tensor query_51_cast_fp16 = add(x = var_4617_cast_fp16, y = var_4623_cast_fp16)[name = tensor("query_51_cast_fp16")]; + tensor var_4632_pad_type_0 = const()[name = tensor("op_4632_pad_type_0"), val = tensor("valid")]; + tensor var_4632_strides_0 = const()[name = tensor("op_4632_strides_0"), val = tensor([1, 1])]; + tensor var_4632_pad_0 = const()[name = tensor("op_4632_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4632_dilations_0 = const()[name = tensor("op_4632_dilations_0"), val = tensor([1, 1])]; + tensor var_4632_groups_0 = const()[name = tensor("op_4632_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335458240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336277504))), name = tensor("layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4632_cast_fp16 = conv(dilations = var_4632_dilations_0, groups = var_4632_groups_0, pad = var_4632_pad_0, pad_type = var_4632_pad_type_0, strides = var_4632_strides_0, weight = layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_4632_cast_fp16")]; + tensor var_4638_pad_type_0 = const()[name = tensor("op_4638_pad_type_0"), val = tensor("valid")]; + tensor var_4638_strides_0 = const()[name = tensor("op_4638_strides_0"), val = tensor([1, 1])]; + tensor var_4638_pad_0 = const()[name = tensor("op_4638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4638_dilations_0 = const()[name = tensor("op_4638_dilations_0"), val = tensor([1, 1])]; + tensor var_4638_groups_0 = const()[name = tensor("op_4638_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336302912))), name = tensor("layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336277632))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4638_cast_fp16 = conv(dilations = var_4638_dilations_0, groups = var_4638_groups_0, pad = var_4638_pad_0, pad_type = var_4638_pad_type_0, strides = var_4638_strides_0, weight = layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_4638_cast_fp16")]; + tensor key_51_cast_fp16 = add(x = var_4632_cast_fp16, y = var_4638_cast_fp16)[name = tensor("key_51_cast_fp16")]; + tensor var_4648_pad_type_0 = const()[name = tensor("op_4648_pad_type_0"), val = tensor("valid")]; + tensor var_4648_strides_0 = const()[name = tensor("op_4648_strides_0"), val = tensor([1, 1])]; + tensor var_4648_pad_0 = const()[name = tensor("op_4648_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4648_dilations_0 = const()[name = tensor("op_4648_dilations_0"), val = tensor([1, 1])]; + tensor var_4648_groups_0 = const()[name = tensor("op_4648_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336507776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337327040))), name = tensor("layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_25_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337327168)))]; + tensor var_4648_cast_fp16 = conv(bias = layers_25_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4648_dilations_0, groups = var_4648_groups_0, pad = var_4648_pad_0, pad_type = var_4648_pad_type_0, strides = var_4648_strides_0, weight = layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_4648_cast_fp16")]; + tensor var_4654_pad_type_0 = const()[name = tensor("op_4654_pad_type_0"), val = tensor("valid")]; + tensor var_4654_strides_0 = const()[name = tensor("op_4654_strides_0"), val = tensor([1, 1])]; + tensor var_4654_pad_0 = const()[name = tensor("op_4654_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4654_dilations_0 = const()[name = tensor("op_4654_dilations_0"), val = tensor([1, 1])]; + tensor var_4654_groups_0 = const()[name = tensor("op_4654_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337341760))), name = tensor("layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337329792))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4654_cast_fp16 = conv(dilations = var_4654_dilations_0, groups = var_4654_groups_0, pad = var_4654_pad_0, pad_type = var_4654_pad_type_0, strides = var_4654_strides_0, weight = layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_4654_cast_fp16")]; + tensor value_51_cast_fp16 = add(x = var_4648_cast_fp16, y = var_4654_cast_fp16)[name = tensor("value_51_cast_fp16")]; + tensor var_4657 = const()[name = tensor("op_4657"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_51_cast_fp16 = reshape(shape = var_4657, x = query_51_cast_fp16)[name = tensor("mh_q_51_cast_fp16")]; + tensor var_4659_to_fp16 = const()[name = tensor("op_4659_to_fp16"), val = tensor(0x1p-3)]; + tensor var_4660_cast_fp16 = mul(x = mh_q_51_cast_fp16, y = var_4659_to_fp16)[name = tensor("op_4660_cast_fp16")]; + tensor var_4661 = const()[name = tensor("op_4661"), val = tensor([1, 20, 64, -1])]; + tensor var_4662_cast_fp16 = reshape(shape = var_4661, x = key_51_cast_fp16)[name = tensor("op_4662_cast_fp16")]; + tensor mh_w_51_transpose_x_0 = const()[name = tensor("mh_w_51_transpose_x_0"), val = tensor(true)]; + tensor mh_w_51_transpose_y_0 = const()[name = tensor("mh_w_51_transpose_y_0"), val = tensor(false)]; + tensor mh_w_51_cast_fp16 = matmul(transpose_x = mh_w_51_transpose_x_0, transpose_y = mh_w_51_transpose_y_0, x = var_4660_cast_fp16, y = var_4662_cast_fp16)[name = tensor("mh_w_51_cast_fp16")]; + tensor var_4665_cast_fp16 = softmax(axis = var_4576, x = mh_w_51_cast_fp16)[name = tensor("op_4665_cast_fp16")]; + tensor var_4666 = const()[name = tensor("op_4666"), val = tensor([1, 20, 64, -1])]; + tensor var_4667_cast_fp16 = reshape(shape = var_4666, x = value_51_cast_fp16)[name = tensor("op_4667_cast_fp16")]; + tensor attn_51_transpose_x_0 = const()[name = tensor("attn_51_transpose_x_0"), val = tensor(false)]; + tensor attn_51_transpose_y_0 = const()[name = tensor("attn_51_transpose_y_0"), val = tensor(true)]; + tensor attn_51_cast_fp16 = matmul(transpose_x = attn_51_transpose_x_0, transpose_y = attn_51_transpose_y_0, x = var_4667_cast_fp16, y = var_4665_cast_fp16)[name = tensor("attn_51_cast_fp16")]; + tensor var_4670 = const()[name = tensor("op_4670"), val = tensor([1, 1280, 1, -1])]; + tensor input_201_cast_fp16 = reshape(shape = var_4670, x = attn_51_cast_fp16)[name = tensor("input_201_cast_fp16")]; + tensor var_4680_pad_type_0 = const()[name = tensor("op_4680_pad_type_0"), val = tensor("valid")]; + tensor var_4680_strides_0 = const()[name = tensor("op_4680_strides_0"), val = tensor([1, 1])]; + tensor var_4680_pad_0 = const()[name = tensor("op_4680_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4680_dilations_0 = const()[name = tensor("op_4680_dilations_0"), val = tensor([1, 1])]; + tensor var_4680_groups_0 = const()[name = tensor("op_4680_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337546624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338365888))), name = tensor("layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_25_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338366016)))]; + tensor var_4680_cast_fp16 = conv(bias = layers_25_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4680_dilations_0, groups = var_4680_groups_0, pad = var_4680_pad_0, pad_type = var_4680_pad_type_0, strides = var_4680_strides_0, weight = layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = tensor("op_4680_cast_fp16")]; + tensor var_4686_pad_type_0 = const()[name = tensor("op_4686_pad_type_0"), val = tensor("valid")]; + tensor var_4686_strides_0 = const()[name = tensor("op_4686_strides_0"), val = tensor([1, 1])]; + tensor var_4686_pad_0 = const()[name = tensor("op_4686_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4686_dilations_0 = const()[name = tensor("op_4686_dilations_0"), val = tensor([1, 1])]; + tensor var_4686_groups_0 = const()[name = tensor("op_4686_groups_0"), val = tensor(1)]; + tensor layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338379584))), name = tensor("layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338368640))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4686_cast_fp16 = conv(dilations = var_4686_dilations_0, groups = var_4686_groups_0, pad = var_4686_pad_0, pad_type = var_4686_pad_type_0, strides = var_4686_strides_0, weight = layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_201_cast_fp16)[name = tensor("op_4686_cast_fp16")]; + tensor obj_103_cast_fp16 = add(x = var_4680_cast_fp16, y = var_4686_cast_fp16)[name = tensor("obj_103_cast_fp16")]; + tensor inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = obj_103_cast_fp16)[name = tensor("inputs_103_cast_fp16")]; + tensor out_103_axes_0 = const()[name = tensor("out_103_axes_0"), val = tensor([1])]; + tensor var_4697_to_fp16 = const()[name = tensor("op_4697_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_4697_to_fp16, x = inputs_103_cast_fp16)[name = tensor("out_103_cast_fp16")]; + tensor input_203_gamma_0_to_fp16 = const()[name = tensor("input_203_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338584448)))]; + tensor input_203_beta_0_to_fp16 = const()[name = tensor("input_203_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338587072)))]; + tensor input_203_epsilon_0_to_fp16 = const()[name = tensor("input_203_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_203_cast_fp16 = batch_norm(beta = input_203_beta_0_to_fp16, epsilon = input_203_epsilon_0_to_fp16, gamma = input_203_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_103_cast_fp16)[name = tensor("input_203_cast_fp16")]; + tensor var_4715_pad_type_0 = const()[name = tensor("op_4715_pad_type_0"), val = tensor("valid")]; + tensor var_4715_strides_0 = const()[name = tensor("op_4715_strides_0"), val = tensor([1, 1])]; + tensor var_4715_pad_0 = const()[name = tensor("op_4715_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4715_dilations_0 = const()[name = tensor("op_4715_dilations_0"), val = tensor([1, 1])]; + tensor var_4715_groups_0 = const()[name = tensor("op_4715_groups_0"), val = tensor(1)]; + tensor layers_25_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338589696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341866560))), name = tensor("layers_25_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_25_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341866688)))]; + tensor var_4715_cast_fp16 = conv(bias = layers_25_fc1_inlier_module_bias_to_fp16, dilations = var_4715_dilations_0, groups = var_4715_groups_0, pad = var_4715_pad_0, pad_type = var_4715_pad_type_0, strides = var_4715_strides_0, weight = layers_25_fc1_inlier_module_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = tensor("op_4715_cast_fp16")]; + tensor var_4721_pad_type_0 = const()[name = tensor("op_4721_pad_type_0"), val = tensor("valid")]; + tensor var_4721_strides_0 = const()[name = tensor("op_4721_strides_0"), val = tensor([1, 1])]; + tensor var_4721_pad_0 = const()[name = tensor("op_4721_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4721_dilations_0 = const()[name = tensor("op_4721_dilations_0"), val = tensor([1, 1])]; + tensor var_4721_groups_0 = const()[name = tensor("op_4721_groups_0"), val = tensor(1)]; + tensor layers_25_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341933760))), name = tensor("layers_25_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341876992))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4721_cast_fp16 = conv(dilations = var_4721_dilations_0, groups = var_4721_groups_0, pad = var_4721_pad_0, pad_type = var_4721_pad_type_0, strides = var_4721_strides_0, weight = layers_25_fc1_outlier_module_weight_to_fp16_sparsified, x = input_203_cast_fp16)[name = tensor("op_4721_cast_fp16")]; + tensor input_205_cast_fp16 = add(x = var_4715_cast_fp16, y = var_4721_cast_fp16)[name = tensor("input_205_cast_fp16")]; + tensor input_207_mode_0 = const()[name = tensor("input_207_mode_0"), val = tensor("EXACT")]; + tensor input_207_cast_fp16 = gelu(mode = input_207_mode_0, x = input_205_cast_fp16)[name = tensor("input_207_cast_fp16")]; + tensor var_4732_pad_type_0 = const()[name = tensor("op_4732_pad_type_0"), val = tensor("valid")]; + tensor var_4732_strides_0 = const()[name = tensor("op_4732_strides_0"), val = tensor([1, 1])]; + tensor var_4732_pad_0 = const()[name = tensor("op_4732_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4732_dilations_0 = const()[name = tensor("op_4732_dilations_0"), val = tensor([1, 1])]; + tensor var_4732_groups_0 = const()[name = tensor("op_4732_groups_0"), val = tensor(1)]; + tensor layers_25_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342753024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346029888))), name = tensor("layers_25_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_25_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346030016)))]; + tensor var_4732_cast_fp16 = conv(bias = layers_25_fc2_inlier_module_bias_to_fp16, dilations = var_4732_dilations_0, groups = var_4732_groups_0, pad = var_4732_pad_0, pad_type = var_4732_pad_type_0, strides = var_4732_strides_0, weight = layers_25_fc2_inlier_module_weight_to_fp16_palettized, x = input_207_cast_fp16)[name = tensor("op_4732_cast_fp16")]; + tensor var_4738_pad_type_0 = const()[name = tensor("op_4738_pad_type_0"), val = tensor("valid")]; + tensor var_4738_strides_0 = const()[name = tensor("op_4738_strides_0"), val = tensor([1, 1])]; + tensor var_4738_pad_0 = const()[name = tensor("op_4738_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4738_dilations_0 = const()[name = tensor("op_4738_dilations_0"), val = tensor([1, 1])]; + tensor var_4738_groups_0 = const()[name = tensor("op_4738_groups_0"), val = tensor(1)]; + tensor layers_25_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346081152))), name = tensor("layers_25_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346032640))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4738_cast_fp16 = conv(dilations = var_4738_dilations_0, groups = var_4738_groups_0, pad = var_4738_pad_0, pad_type = var_4738_pad_type_0, strides = var_4738_strides_0, weight = layers_25_fc2_outlier_module_weight_to_fp16_sparsified, x = input_207_cast_fp16)[name = tensor("op_4738_cast_fp16")]; + tensor hidden_states_55_cast_fp16 = add(x = var_4732_cast_fp16, y = var_4738_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; + tensor inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = hidden_states_55_cast_fp16)[name = tensor("inputs_105_cast_fp16")]; + tensor var_4748 = const()[name = tensor("op_4748"), val = tensor(3)]; + tensor out_105_axes_0 = const()[name = tensor("out_105_axes_0"), val = tensor([1])]; + tensor var_4767_to_fp16 = const()[name = tensor("op_4767_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_105_cast_fp16 = layer_norm(axes = out_105_axes_0, epsilon = var_4767_to_fp16, x = inputs_105_cast_fp16)[name = tensor("out_105_cast_fp16")]; + tensor obj_105_gamma_0_to_fp16 = const()[name = tensor("obj_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346900416)))]; + tensor obj_105_beta_0_to_fp16 = const()[name = tensor("obj_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346903040)))]; + tensor obj_105_epsilon_0_to_fp16 = const()[name = tensor("obj_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_105_cast_fp16)[name = tensor("obj_105_cast_fp16")]; + tensor var_4789_pad_type_0 = const()[name = tensor("op_4789_pad_type_0"), val = tensor("valid")]; + tensor var_4789_strides_0 = const()[name = tensor("op_4789_strides_0"), val = tensor([1, 1])]; + tensor var_4789_pad_0 = const()[name = tensor("op_4789_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4789_dilations_0 = const()[name = tensor("op_4789_dilations_0"), val = tensor([1, 1])]; + tensor var_4789_groups_0 = const()[name = tensor("op_4789_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346905664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347724928))), name = tensor("layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_26_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347725056)))]; + tensor var_4789_cast_fp16 = conv(bias = layers_26_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4789_dilations_0, groups = var_4789_groups_0, pad = var_4789_pad_0, pad_type = var_4789_pad_type_0, strides = var_4789_strides_0, weight = layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_4789_cast_fp16")]; + tensor var_4795_pad_type_0 = const()[name = tensor("op_4795_pad_type_0"), val = tensor("valid")]; + tensor var_4795_strides_0 = const()[name = tensor("op_4795_strides_0"), val = tensor([1, 1])]; + tensor var_4795_pad_0 = const()[name = tensor("op_4795_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4795_dilations_0 = const()[name = tensor("op_4795_dilations_0"), val = tensor([1, 1])]; + tensor var_4795_groups_0 = const()[name = tensor("op_4795_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347761152))), name = tensor("layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347727680))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4795_cast_fp16 = conv(dilations = var_4795_dilations_0, groups = var_4795_groups_0, pad = var_4795_pad_0, pad_type = var_4795_pad_type_0, strides = var_4795_strides_0, weight = layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_4795_cast_fp16")]; + tensor query_53_cast_fp16 = add(x = var_4789_cast_fp16, y = var_4795_cast_fp16)[name = tensor("query_53_cast_fp16")]; + tensor var_4804_pad_type_0 = const()[name = tensor("op_4804_pad_type_0"), val = tensor("valid")]; + tensor var_4804_strides_0 = const()[name = tensor("op_4804_strides_0"), val = tensor([1, 1])]; + tensor var_4804_pad_0 = const()[name = tensor("op_4804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4804_dilations_0 = const()[name = tensor("op_4804_dilations_0"), val = tensor([1, 1])]; + tensor var_4804_groups_0 = const()[name = tensor("op_4804_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347966016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348785280))), name = tensor("layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4804_cast_fp16 = conv(dilations = var_4804_dilations_0, groups = var_4804_groups_0, pad = var_4804_pad_0, pad_type = var_4804_pad_type_0, strides = var_4804_strides_0, weight = layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_4804_cast_fp16")]; + tensor var_4810_pad_type_0 = const()[name = tensor("op_4810_pad_type_0"), val = tensor("valid")]; + tensor var_4810_strides_0 = const()[name = tensor("op_4810_strides_0"), val = tensor([1, 1])]; + tensor var_4810_pad_0 = const()[name = tensor("op_4810_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4810_dilations_0 = const()[name = tensor("op_4810_dilations_0"), val = tensor([1, 1])]; + tensor var_4810_groups_0 = const()[name = tensor("op_4810_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348811904))), name = tensor("layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348785408))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4810_cast_fp16 = conv(dilations = var_4810_dilations_0, groups = var_4810_groups_0, pad = var_4810_pad_0, pad_type = var_4810_pad_type_0, strides = var_4810_strides_0, weight = layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_4810_cast_fp16")]; + tensor key_53_cast_fp16 = add(x = var_4804_cast_fp16, y = var_4810_cast_fp16)[name = tensor("key_53_cast_fp16")]; + tensor var_4820_pad_type_0 = const()[name = tensor("op_4820_pad_type_0"), val = tensor("valid")]; + tensor var_4820_strides_0 = const()[name = tensor("op_4820_strides_0"), val = tensor([1, 1])]; + tensor var_4820_pad_0 = const()[name = tensor("op_4820_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4820_dilations_0 = const()[name = tensor("op_4820_dilations_0"), val = tensor([1, 1])]; + tensor var_4820_groups_0 = const()[name = tensor("op_4820_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349016768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349836032))), name = tensor("layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_26_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349836160)))]; + tensor var_4820_cast_fp16 = conv(bias = layers_26_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4820_dilations_0, groups = var_4820_groups_0, pad = var_4820_pad_0, pad_type = var_4820_pad_type_0, strides = var_4820_strides_0, weight = layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_4820_cast_fp16")]; + tensor var_4826_pad_type_0 = const()[name = tensor("op_4826_pad_type_0"), val = tensor("valid")]; + tensor var_4826_strides_0 = const()[name = tensor("op_4826_strides_0"), val = tensor([1, 1])]; + tensor var_4826_pad_0 = const()[name = tensor("op_4826_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4826_dilations_0 = const()[name = tensor("op_4826_dilations_0"), val = tensor([1, 1])]; + tensor var_4826_groups_0 = const()[name = tensor("op_4826_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349851264))), name = tensor("layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349838784))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4826_cast_fp16 = conv(dilations = var_4826_dilations_0, groups = var_4826_groups_0, pad = var_4826_pad_0, pad_type = var_4826_pad_type_0, strides = var_4826_strides_0, weight = layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_4826_cast_fp16")]; + tensor value_53_cast_fp16 = add(x = var_4820_cast_fp16, y = var_4826_cast_fp16)[name = tensor("value_53_cast_fp16")]; + tensor var_4829 = const()[name = tensor("op_4829"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_53_cast_fp16 = reshape(shape = var_4829, x = query_53_cast_fp16)[name = tensor("mh_q_53_cast_fp16")]; + tensor var_4831_to_fp16 = const()[name = tensor("op_4831_to_fp16"), val = tensor(0x1p-3)]; + tensor var_4832_cast_fp16 = mul(x = mh_q_53_cast_fp16, y = var_4831_to_fp16)[name = tensor("op_4832_cast_fp16")]; + tensor var_4833 = const()[name = tensor("op_4833"), val = tensor([1, 20, 64, -1])]; + tensor var_4834_cast_fp16 = reshape(shape = var_4833, x = key_53_cast_fp16)[name = tensor("op_4834_cast_fp16")]; + tensor mh_w_53_transpose_x_0 = const()[name = tensor("mh_w_53_transpose_x_0"), val = tensor(true)]; + tensor mh_w_53_transpose_y_0 = const()[name = tensor("mh_w_53_transpose_y_0"), val = tensor(false)]; + tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_4832_cast_fp16, y = var_4834_cast_fp16)[name = tensor("mh_w_53_cast_fp16")]; + tensor var_4837_cast_fp16 = softmax(axis = var_4748, x = mh_w_53_cast_fp16)[name = tensor("op_4837_cast_fp16")]; + tensor var_4838 = const()[name = tensor("op_4838"), val = tensor([1, 20, 64, -1])]; + tensor var_4839_cast_fp16 = reshape(shape = var_4838, x = value_53_cast_fp16)[name = tensor("op_4839_cast_fp16")]; + tensor attn_53_transpose_x_0 = const()[name = tensor("attn_53_transpose_x_0"), val = tensor(false)]; + tensor attn_53_transpose_y_0 = const()[name = tensor("attn_53_transpose_y_0"), val = tensor(true)]; + tensor attn_53_cast_fp16 = matmul(transpose_x = attn_53_transpose_x_0, transpose_y = attn_53_transpose_y_0, x = var_4839_cast_fp16, y = var_4837_cast_fp16)[name = tensor("attn_53_cast_fp16")]; + tensor var_4842 = const()[name = tensor("op_4842"), val = tensor([1, 1280, 1, -1])]; + tensor input_209_cast_fp16 = reshape(shape = var_4842, x = attn_53_cast_fp16)[name = tensor("input_209_cast_fp16")]; + tensor var_4852_pad_type_0 = const()[name = tensor("op_4852_pad_type_0"), val = tensor("valid")]; + tensor var_4852_strides_0 = const()[name = tensor("op_4852_strides_0"), val = tensor([1, 1])]; + tensor var_4852_pad_0 = const()[name = tensor("op_4852_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4852_dilations_0 = const()[name = tensor("op_4852_dilations_0"), val = tensor([1, 1])]; + tensor var_4852_groups_0 = const()[name = tensor("op_4852_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350056128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350875392))), name = tensor("layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_26_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350875520)))]; + tensor var_4852_cast_fp16 = conv(bias = layers_26_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4852_dilations_0, groups = var_4852_groups_0, pad = var_4852_pad_0, pad_type = var_4852_pad_type_0, strides = var_4852_strides_0, weight = layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = tensor("op_4852_cast_fp16")]; + tensor var_4858_pad_type_0 = const()[name = tensor("op_4858_pad_type_0"), val = tensor("valid")]; + tensor var_4858_strides_0 = const()[name = tensor("op_4858_strides_0"), val = tensor([1, 1])]; + tensor var_4858_pad_0 = const()[name = tensor("op_4858_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4858_dilations_0 = const()[name = tensor("op_4858_dilations_0"), val = tensor([1, 1])]; + tensor var_4858_groups_0 = const()[name = tensor("op_4858_groups_0"), val = tensor(1)]; + tensor layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350889984))), name = tensor("layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350878144))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4858_cast_fp16 = conv(dilations = var_4858_dilations_0, groups = var_4858_groups_0, pad = var_4858_pad_0, pad_type = var_4858_pad_type_0, strides = var_4858_strides_0, weight = layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_209_cast_fp16)[name = tensor("op_4858_cast_fp16")]; + tensor obj_107_cast_fp16 = add(x = var_4852_cast_fp16, y = var_4858_cast_fp16)[name = tensor("obj_107_cast_fp16")]; + tensor inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = obj_107_cast_fp16)[name = tensor("inputs_107_cast_fp16")]; + tensor out_107_axes_0 = const()[name = tensor("out_107_axes_0"), val = tensor([1])]; + tensor var_4869_to_fp16 = const()[name = tensor("op_4869_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_107_cast_fp16 = layer_norm(axes = out_107_axes_0, epsilon = var_4869_to_fp16, x = inputs_107_cast_fp16)[name = tensor("out_107_cast_fp16")]; + tensor input_211_gamma_0_to_fp16 = const()[name = tensor("input_211_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351094848)))]; + tensor input_211_beta_0_to_fp16 = const()[name = tensor("input_211_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351097472)))]; + tensor input_211_epsilon_0_to_fp16 = const()[name = tensor("input_211_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_211_cast_fp16 = batch_norm(beta = input_211_beta_0_to_fp16, epsilon = input_211_epsilon_0_to_fp16, gamma = input_211_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_107_cast_fp16)[name = tensor("input_211_cast_fp16")]; + tensor var_4887_pad_type_0 = const()[name = tensor("op_4887_pad_type_0"), val = tensor("valid")]; + tensor var_4887_strides_0 = const()[name = tensor("op_4887_strides_0"), val = tensor([1, 1])]; + tensor var_4887_pad_0 = const()[name = tensor("op_4887_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4887_dilations_0 = const()[name = tensor("op_4887_dilations_0"), val = tensor([1, 1])]; + tensor var_4887_groups_0 = const()[name = tensor("op_4887_groups_0"), val = tensor(1)]; + tensor layers_26_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351100096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354376960))), name = tensor("layers_26_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_26_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354377088)))]; + tensor var_4887_cast_fp16 = conv(bias = layers_26_fc1_inlier_module_bias_to_fp16, dilations = var_4887_dilations_0, groups = var_4887_groups_0, pad = var_4887_pad_0, pad_type = var_4887_pad_type_0, strides = var_4887_strides_0, weight = layers_26_fc1_inlier_module_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = tensor("op_4887_cast_fp16")]; + tensor var_4893_pad_type_0 = const()[name = tensor("op_4893_pad_type_0"), val = tensor("valid")]; + tensor var_4893_strides_0 = const()[name = tensor("op_4893_strides_0"), val = tensor([1, 1])]; + tensor var_4893_pad_0 = const()[name = tensor("op_4893_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4893_dilations_0 = const()[name = tensor("op_4893_dilations_0"), val = tensor([1, 1])]; + tensor var_4893_groups_0 = const()[name = tensor("op_4893_groups_0"), val = tensor(1)]; + tensor layers_26_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354440128))), name = tensor("layers_26_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354387392))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_4893_cast_fp16 = conv(dilations = var_4893_dilations_0, groups = var_4893_groups_0, pad = var_4893_pad_0, pad_type = var_4893_pad_type_0, strides = var_4893_strides_0, weight = layers_26_fc1_outlier_module_weight_to_fp16_sparsified, x = input_211_cast_fp16)[name = tensor("op_4893_cast_fp16")]; + tensor input_213_cast_fp16 = add(x = var_4887_cast_fp16, y = var_4893_cast_fp16)[name = tensor("input_213_cast_fp16")]; + tensor input_215_mode_0 = const()[name = tensor("input_215_mode_0"), val = tensor("EXACT")]; + tensor input_215_cast_fp16 = gelu(mode = input_215_mode_0, x = input_213_cast_fp16)[name = tensor("input_215_cast_fp16")]; + tensor var_4904_pad_type_0 = const()[name = tensor("op_4904_pad_type_0"), val = tensor("valid")]; + tensor var_4904_strides_0 = const()[name = tensor("op_4904_strides_0"), val = tensor([1, 1])]; + tensor var_4904_pad_0 = const()[name = tensor("op_4904_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4904_dilations_0 = const()[name = tensor("op_4904_dilations_0"), val = tensor([1, 1])]; + tensor var_4904_groups_0 = const()[name = tensor("op_4904_groups_0"), val = tensor(1)]; + tensor layers_26_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355259392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358536256))), name = tensor("layers_26_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_26_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358536384)))]; + tensor var_4904_cast_fp16 = conv(bias = layers_26_fc2_inlier_module_bias_to_fp16, dilations = var_4904_dilations_0, groups = var_4904_groups_0, pad = var_4904_pad_0, pad_type = var_4904_pad_type_0, strides = var_4904_strides_0, weight = layers_26_fc2_inlier_module_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = tensor("op_4904_cast_fp16")]; + tensor var_4910_pad_type_0 = const()[name = tensor("op_4910_pad_type_0"), val = tensor("valid")]; + tensor var_4910_strides_0 = const()[name = tensor("op_4910_strides_0"), val = tensor([1, 1])]; + tensor var_4910_pad_0 = const()[name = tensor("op_4910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4910_dilations_0 = const()[name = tensor("op_4910_dilations_0"), val = tensor([1, 1])]; + tensor var_4910_groups_0 = const()[name = tensor("op_4910_groups_0"), val = tensor(1)]; + tensor layers_26_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358588352))), name = tensor("layers_26_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358539008))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_4910_cast_fp16 = conv(dilations = var_4910_dilations_0, groups = var_4910_groups_0, pad = var_4910_pad_0, pad_type = var_4910_pad_type_0, strides = var_4910_strides_0, weight = layers_26_fc2_outlier_module_weight_to_fp16_sparsified, x = input_215_cast_fp16)[name = tensor("op_4910_cast_fp16")]; + tensor hidden_states_57_cast_fp16 = add(x = var_4904_cast_fp16, y = var_4910_cast_fp16)[name = tensor("hidden_states_57_cast_fp16")]; + tensor inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = hidden_states_57_cast_fp16)[name = tensor("inputs_109_cast_fp16")]; + tensor var_4920 = const()[name = tensor("op_4920"), val = tensor(3)]; + tensor out_109_axes_0 = const()[name = tensor("out_109_axes_0"), val = tensor([1])]; + tensor var_4939_to_fp16 = const()[name = tensor("op_4939_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_4939_to_fp16, x = inputs_109_cast_fp16)[name = tensor("out_109_cast_fp16")]; + tensor obj_109_gamma_0_to_fp16 = const()[name = tensor("obj_109_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359407616)))]; + tensor obj_109_beta_0_to_fp16 = const()[name = tensor("obj_109_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359410240)))]; + tensor obj_109_epsilon_0_to_fp16 = const()[name = tensor("obj_109_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_109_cast_fp16 = batch_norm(beta = obj_109_beta_0_to_fp16, epsilon = obj_109_epsilon_0_to_fp16, gamma = obj_109_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_109_cast_fp16)[name = tensor("obj_109_cast_fp16")]; + tensor var_4961_pad_type_0 = const()[name = tensor("op_4961_pad_type_0"), val = tensor("valid")]; + tensor var_4961_strides_0 = const()[name = tensor("op_4961_strides_0"), val = tensor([1, 1])]; + tensor var_4961_pad_0 = const()[name = tensor("op_4961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4961_dilations_0 = const()[name = tensor("op_4961_dilations_0"), val = tensor([1, 1])]; + tensor var_4961_groups_0 = const()[name = tensor("op_4961_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359412864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360232128))), name = tensor("layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_27_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360232256)))]; + tensor var_4961_cast_fp16 = conv(bias = layers_27_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4961_dilations_0, groups = var_4961_groups_0, pad = var_4961_pad_0, pad_type = var_4961_pad_type_0, strides = var_4961_strides_0, weight = layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_4961_cast_fp16")]; + tensor var_4967_pad_type_0 = const()[name = tensor("op_4967_pad_type_0"), val = tensor("valid")]; + tensor var_4967_strides_0 = const()[name = tensor("op_4967_strides_0"), val = tensor([1, 1])]; + tensor var_4967_pad_0 = const()[name = tensor("op_4967_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4967_dilations_0 = const()[name = tensor("op_4967_dilations_0"), val = tensor([1, 1])]; + tensor var_4967_groups_0 = const()[name = tensor("op_4967_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360271616))), name = tensor("layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360234880))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4967_cast_fp16 = conv(dilations = var_4967_dilations_0, groups = var_4967_groups_0, pad = var_4967_pad_0, pad_type = var_4967_pad_type_0, strides = var_4967_strides_0, weight = layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_4967_cast_fp16")]; + tensor query_55_cast_fp16 = add(x = var_4961_cast_fp16, y = var_4967_cast_fp16)[name = tensor("query_55_cast_fp16")]; + tensor var_4976_pad_type_0 = const()[name = tensor("op_4976_pad_type_0"), val = tensor("valid")]; + tensor var_4976_strides_0 = const()[name = tensor("op_4976_strides_0"), val = tensor([1, 1])]; + tensor var_4976_pad_0 = const()[name = tensor("op_4976_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4976_dilations_0 = const()[name = tensor("op_4976_dilations_0"), val = tensor([1, 1])]; + tensor var_4976_groups_0 = const()[name = tensor("op_4976_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360476480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361295744))), name = tensor("layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4976_cast_fp16 = conv(dilations = var_4976_dilations_0, groups = var_4976_groups_0, pad = var_4976_pad_0, pad_type = var_4976_pad_type_0, strides = var_4976_strides_0, weight = layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_4976_cast_fp16")]; + tensor var_4982_pad_type_0 = const()[name = tensor("op_4982_pad_type_0"), val = tensor("valid")]; + tensor var_4982_strides_0 = const()[name = tensor("op_4982_strides_0"), val = tensor([1, 1])]; + tensor var_4982_pad_0 = const()[name = tensor("op_4982_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4982_dilations_0 = const()[name = tensor("op_4982_dilations_0"), val = tensor([1, 1])]; + tensor var_4982_groups_0 = const()[name = tensor("op_4982_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361321984))), name = tensor("layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361295872))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4982_cast_fp16 = conv(dilations = var_4982_dilations_0, groups = var_4982_groups_0, pad = var_4982_pad_0, pad_type = var_4982_pad_type_0, strides = var_4982_strides_0, weight = layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_4982_cast_fp16")]; + tensor key_55_cast_fp16 = add(x = var_4976_cast_fp16, y = var_4982_cast_fp16)[name = tensor("key_55_cast_fp16")]; + tensor var_4992_pad_type_0 = const()[name = tensor("op_4992_pad_type_0"), val = tensor("valid")]; + tensor var_4992_strides_0 = const()[name = tensor("op_4992_strides_0"), val = tensor([1, 1])]; + tensor var_4992_pad_0 = const()[name = tensor("op_4992_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4992_dilations_0 = const()[name = tensor("op_4992_dilations_0"), val = tensor([1, 1])]; + tensor var_4992_groups_0 = const()[name = tensor("op_4992_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361526848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362346112))), name = tensor("layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_27_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362346240)))]; + tensor var_4992_cast_fp16 = conv(bias = layers_27_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4992_dilations_0, groups = var_4992_groups_0, pad = var_4992_pad_0, pad_type = var_4992_pad_type_0, strides = var_4992_strides_0, weight = layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_4992_cast_fp16")]; + tensor var_4998_pad_type_0 = const()[name = tensor("op_4998_pad_type_0"), val = tensor("valid")]; + tensor var_4998_strides_0 = const()[name = tensor("op_4998_strides_0"), val = tensor([1, 1])]; + tensor var_4998_pad_0 = const()[name = tensor("op_4998_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4998_dilations_0 = const()[name = tensor("op_4998_dilations_0"), val = tensor([1, 1])]; + tensor var_4998_groups_0 = const()[name = tensor("op_4998_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362359552))), name = tensor("layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362348864))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_4998_cast_fp16 = conv(dilations = var_4998_dilations_0, groups = var_4998_groups_0, pad = var_4998_pad_0, pad_type = var_4998_pad_type_0, strides = var_4998_strides_0, weight = layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_4998_cast_fp16")]; + tensor value_55_cast_fp16 = add(x = var_4992_cast_fp16, y = var_4998_cast_fp16)[name = tensor("value_55_cast_fp16")]; + tensor var_5001 = const()[name = tensor("op_5001"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_55_cast_fp16 = reshape(shape = var_5001, x = query_55_cast_fp16)[name = tensor("mh_q_55_cast_fp16")]; + tensor var_5003_to_fp16 = const()[name = tensor("op_5003_to_fp16"), val = tensor(0x1p-3)]; + tensor var_5004_cast_fp16 = mul(x = mh_q_55_cast_fp16, y = var_5003_to_fp16)[name = tensor("op_5004_cast_fp16")]; + tensor var_5005 = const()[name = tensor("op_5005"), val = tensor([1, 20, 64, -1])]; + tensor var_5006_cast_fp16 = reshape(shape = var_5005, x = key_55_cast_fp16)[name = tensor("op_5006_cast_fp16")]; + tensor mh_w_55_transpose_x_0 = const()[name = tensor("mh_w_55_transpose_x_0"), val = tensor(true)]; + tensor mh_w_55_transpose_y_0 = const()[name = tensor("mh_w_55_transpose_y_0"), val = tensor(false)]; + tensor mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_5004_cast_fp16, y = var_5006_cast_fp16)[name = tensor("mh_w_55_cast_fp16")]; + tensor var_5009_cast_fp16 = softmax(axis = var_4920, x = mh_w_55_cast_fp16)[name = tensor("op_5009_cast_fp16")]; + tensor var_5010 = const()[name = tensor("op_5010"), val = tensor([1, 20, 64, -1])]; + tensor var_5011_cast_fp16 = reshape(shape = var_5010, x = value_55_cast_fp16)[name = tensor("op_5011_cast_fp16")]; + tensor attn_55_transpose_x_0 = const()[name = tensor("attn_55_transpose_x_0"), val = tensor(false)]; + tensor attn_55_transpose_y_0 = const()[name = tensor("attn_55_transpose_y_0"), val = tensor(true)]; + tensor attn_55_cast_fp16 = matmul(transpose_x = attn_55_transpose_x_0, transpose_y = attn_55_transpose_y_0, x = var_5011_cast_fp16, y = var_5009_cast_fp16)[name = tensor("attn_55_cast_fp16")]; + tensor var_5014 = const()[name = tensor("op_5014"), val = tensor([1, 1280, 1, -1])]; + tensor input_217_cast_fp16 = reshape(shape = var_5014, x = attn_55_cast_fp16)[name = tensor("input_217_cast_fp16")]; + tensor var_5024_pad_type_0 = const()[name = tensor("op_5024_pad_type_0"), val = tensor("valid")]; + tensor var_5024_strides_0 = const()[name = tensor("op_5024_strides_0"), val = tensor([1, 1])]; + tensor var_5024_pad_0 = const()[name = tensor("op_5024_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5024_dilations_0 = const()[name = tensor("op_5024_dilations_0"), val = tensor([1, 1])]; + tensor var_5024_groups_0 = const()[name = tensor("op_5024_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362564416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363383680))), name = tensor("layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_27_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363383808)))]; + tensor var_5024_cast_fp16 = conv(bias = layers_27_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5024_dilations_0, groups = var_5024_groups_0, pad = var_5024_pad_0, pad_type = var_5024_pad_type_0, strides = var_5024_strides_0, weight = layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = tensor("op_5024_cast_fp16")]; + tensor var_5030_pad_type_0 = const()[name = tensor("op_5030_pad_type_0"), val = tensor("valid")]; + tensor var_5030_strides_0 = const()[name = tensor("op_5030_strides_0"), val = tensor([1, 1])]; + tensor var_5030_pad_0 = const()[name = tensor("op_5030_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5030_dilations_0 = const()[name = tensor("op_5030_dilations_0"), val = tensor([1, 1])]; + tensor var_5030_groups_0 = const()[name = tensor("op_5030_groups_0"), val = tensor(1)]; + tensor layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363396544))), name = tensor("layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363386432))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5030_cast_fp16 = conv(dilations = var_5030_dilations_0, groups = var_5030_groups_0, pad = var_5030_pad_0, pad_type = var_5030_pad_type_0, strides = var_5030_strides_0, weight = layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_217_cast_fp16)[name = tensor("op_5030_cast_fp16")]; + tensor obj_111_cast_fp16 = add(x = var_5024_cast_fp16, y = var_5030_cast_fp16)[name = tensor("obj_111_cast_fp16")]; + tensor inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = obj_111_cast_fp16)[name = tensor("inputs_111_cast_fp16")]; + tensor out_111_axes_0 = const()[name = tensor("out_111_axes_0"), val = tensor([1])]; + tensor var_5041_to_fp16 = const()[name = tensor("op_5041_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_111_cast_fp16 = layer_norm(axes = out_111_axes_0, epsilon = var_5041_to_fp16, x = inputs_111_cast_fp16)[name = tensor("out_111_cast_fp16")]; + tensor input_219_gamma_0_to_fp16 = const()[name = tensor("input_219_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363601408)))]; + tensor input_219_beta_0_to_fp16 = const()[name = tensor("input_219_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363604032)))]; + tensor input_219_epsilon_0_to_fp16 = const()[name = tensor("input_219_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_219_cast_fp16 = batch_norm(beta = input_219_beta_0_to_fp16, epsilon = input_219_epsilon_0_to_fp16, gamma = input_219_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_111_cast_fp16)[name = tensor("input_219_cast_fp16")]; + tensor var_5059_pad_type_0 = const()[name = tensor("op_5059_pad_type_0"), val = tensor("valid")]; + tensor var_5059_strides_0 = const()[name = tensor("op_5059_strides_0"), val = tensor([1, 1])]; + tensor var_5059_pad_0 = const()[name = tensor("op_5059_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5059_dilations_0 = const()[name = tensor("op_5059_dilations_0"), val = tensor([1, 1])]; + tensor var_5059_groups_0 = const()[name = tensor("op_5059_groups_0"), val = tensor(1)]; + tensor layers_27_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363606656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366883520))), name = tensor("layers_27_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_27_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366883648)))]; + tensor var_5059_cast_fp16 = conv(bias = layers_27_fc1_inlier_module_bias_to_fp16, dilations = var_5059_dilations_0, groups = var_5059_groups_0, pad = var_5059_pad_0, pad_type = var_5059_pad_type_0, strides = var_5059_strides_0, weight = layers_27_fc1_inlier_module_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = tensor("op_5059_cast_fp16")]; + tensor var_5065_pad_type_0 = const()[name = tensor("op_5065_pad_type_0"), val = tensor("valid")]; + tensor var_5065_strides_0 = const()[name = tensor("op_5065_strides_0"), val = tensor([1, 1])]; + tensor var_5065_pad_0 = const()[name = tensor("op_5065_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5065_dilations_0 = const()[name = tensor("op_5065_dilations_0"), val = tensor([1, 1])]; + tensor var_5065_groups_0 = const()[name = tensor("op_5065_groups_0"), val = tensor(1)]; + tensor layers_27_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366948672))), name = tensor("layers_27_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366893952))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_5065_cast_fp16 = conv(dilations = var_5065_dilations_0, groups = var_5065_groups_0, pad = var_5065_pad_0, pad_type = var_5065_pad_type_0, strides = var_5065_strides_0, weight = layers_27_fc1_outlier_module_weight_to_fp16_sparsified, x = input_219_cast_fp16)[name = tensor("op_5065_cast_fp16")]; + tensor input_221_cast_fp16 = add(x = var_5059_cast_fp16, y = var_5065_cast_fp16)[name = tensor("input_221_cast_fp16")]; + tensor input_223_mode_0 = const()[name = tensor("input_223_mode_0"), val = tensor("EXACT")]; + tensor input_223_cast_fp16 = gelu(mode = input_223_mode_0, x = input_221_cast_fp16)[name = tensor("input_223_cast_fp16")]; + tensor var_5076_pad_type_0 = const()[name = tensor("op_5076_pad_type_0"), val = tensor("valid")]; + tensor var_5076_strides_0 = const()[name = tensor("op_5076_strides_0"), val = tensor([1, 1])]; + tensor var_5076_pad_0 = const()[name = tensor("op_5076_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5076_dilations_0 = const()[name = tensor("op_5076_dilations_0"), val = tensor([1, 1])]; + tensor var_5076_groups_0 = const()[name = tensor("op_5076_groups_0"), val = tensor(1)]; + tensor layers_27_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(367767936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371044800))), name = tensor("layers_27_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_27_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371044928)))]; + tensor var_5076_cast_fp16 = conv(bias = layers_27_fc2_inlier_module_bias_to_fp16, dilations = var_5076_dilations_0, groups = var_5076_groups_0, pad = var_5076_pad_0, pad_type = var_5076_pad_type_0, strides = var_5076_strides_0, weight = layers_27_fc2_inlier_module_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = tensor("op_5076_cast_fp16")]; + tensor var_5082_pad_type_0 = const()[name = tensor("op_5082_pad_type_0"), val = tensor("valid")]; + tensor var_5082_strides_0 = const()[name = tensor("op_5082_strides_0"), val = tensor([1, 1])]; + tensor var_5082_pad_0 = const()[name = tensor("op_5082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5082_dilations_0 = const()[name = tensor("op_5082_dilations_0"), val = tensor([1, 1])]; + tensor var_5082_groups_0 = const()[name = tensor("op_5082_groups_0"), val = tensor(1)]; + tensor layers_27_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371098048))), name = tensor("layers_27_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371047552))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_5082_cast_fp16 = conv(dilations = var_5082_dilations_0, groups = var_5082_groups_0, pad = var_5082_pad_0, pad_type = var_5082_pad_type_0, strides = var_5082_strides_0, weight = layers_27_fc2_outlier_module_weight_to_fp16_sparsified, x = input_223_cast_fp16)[name = tensor("op_5082_cast_fp16")]; + tensor hidden_states_59_cast_fp16 = add(x = var_5076_cast_fp16, y = var_5082_cast_fp16)[name = tensor("hidden_states_59_cast_fp16")]; + tensor inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = hidden_states_59_cast_fp16)[name = tensor("inputs_113_cast_fp16")]; + tensor var_5092 = const()[name = tensor("op_5092"), val = tensor(3)]; + tensor out_113_axes_0 = const()[name = tensor("out_113_axes_0"), val = tensor([1])]; + tensor var_5111_to_fp16 = const()[name = tensor("op_5111_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_113_cast_fp16 = layer_norm(axes = out_113_axes_0, epsilon = var_5111_to_fp16, x = inputs_113_cast_fp16)[name = tensor("out_113_cast_fp16")]; + tensor obj_113_gamma_0_to_fp16 = const()[name = tensor("obj_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371917312)))]; + tensor obj_113_beta_0_to_fp16 = const()[name = tensor("obj_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371919936)))]; + tensor obj_113_epsilon_0_to_fp16 = const()[name = tensor("obj_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_113_cast_fp16)[name = tensor("obj_113_cast_fp16")]; + tensor var_5133_pad_type_0 = const()[name = tensor("op_5133_pad_type_0"), val = tensor("valid")]; + tensor var_5133_strides_0 = const()[name = tensor("op_5133_strides_0"), val = tensor([1, 1])]; + tensor var_5133_pad_0 = const()[name = tensor("op_5133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5133_dilations_0 = const()[name = tensor("op_5133_dilations_0"), val = tensor([1, 1])]; + tensor var_5133_groups_0 = const()[name = tensor("op_5133_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371922560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372741824))), name = tensor("layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_28_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372741952)))]; + tensor var_5133_cast_fp16 = conv(bias = layers_28_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5133_dilations_0, groups = var_5133_groups_0, pad = var_5133_pad_0, pad_type = var_5133_pad_type_0, strides = var_5133_strides_0, weight = layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_5133_cast_fp16")]; + tensor var_5139_pad_type_0 = const()[name = tensor("op_5139_pad_type_0"), val = tensor("valid")]; + tensor var_5139_strides_0 = const()[name = tensor("op_5139_strides_0"), val = tensor([1, 1])]; + tensor var_5139_pad_0 = const()[name = tensor("op_5139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5139_dilations_0 = const()[name = tensor("op_5139_dilations_0"), val = tensor([1, 1])]; + tensor var_5139_groups_0 = const()[name = tensor("op_5139_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372772992))), name = tensor("layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372744576))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5139_cast_fp16 = conv(dilations = var_5139_dilations_0, groups = var_5139_groups_0, pad = var_5139_pad_0, pad_type = var_5139_pad_type_0, strides = var_5139_strides_0, weight = layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_5139_cast_fp16")]; + tensor query_57_cast_fp16 = add(x = var_5133_cast_fp16, y = var_5139_cast_fp16)[name = tensor("query_57_cast_fp16")]; + tensor var_5148_pad_type_0 = const()[name = tensor("op_5148_pad_type_0"), val = tensor("valid")]; + tensor var_5148_strides_0 = const()[name = tensor("op_5148_strides_0"), val = tensor([1, 1])]; + tensor var_5148_pad_0 = const()[name = tensor("op_5148_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5148_dilations_0 = const()[name = tensor("op_5148_dilations_0"), val = tensor([1, 1])]; + tensor var_5148_groups_0 = const()[name = tensor("op_5148_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372977856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373797120))), name = tensor("layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5148_cast_fp16 = conv(dilations = var_5148_dilations_0, groups = var_5148_groups_0, pad = var_5148_pad_0, pad_type = var_5148_pad_type_0, strides = var_5148_strides_0, weight = layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_5148_cast_fp16")]; + tensor var_5154_pad_type_0 = const()[name = tensor("op_5154_pad_type_0"), val = tensor("valid")]; + tensor var_5154_strides_0 = const()[name = tensor("op_5154_strides_0"), val = tensor([1, 1])]; + tensor var_5154_pad_0 = const()[name = tensor("op_5154_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5154_dilations_0 = const()[name = tensor("op_5154_dilations_0"), val = tensor([1, 1])]; + tensor var_5154_groups_0 = const()[name = tensor("op_5154_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373819328))), name = tensor("layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373797248))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5154_cast_fp16 = conv(dilations = var_5154_dilations_0, groups = var_5154_groups_0, pad = var_5154_pad_0, pad_type = var_5154_pad_type_0, strides = var_5154_strides_0, weight = layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_5154_cast_fp16")]; + tensor key_57_cast_fp16 = add(x = var_5148_cast_fp16, y = var_5154_cast_fp16)[name = tensor("key_57_cast_fp16")]; + tensor var_5164_pad_type_0 = const()[name = tensor("op_5164_pad_type_0"), val = tensor("valid")]; + tensor var_5164_strides_0 = const()[name = tensor("op_5164_strides_0"), val = tensor([1, 1])]; + tensor var_5164_pad_0 = const()[name = tensor("op_5164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5164_dilations_0 = const()[name = tensor("op_5164_dilations_0"), val = tensor([1, 1])]; + tensor var_5164_groups_0 = const()[name = tensor("op_5164_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374024192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374843456))), name = tensor("layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_28_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374843584)))]; + tensor var_5164_cast_fp16 = conv(bias = layers_28_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5164_dilations_0, groups = var_5164_groups_0, pad = var_5164_pad_0, pad_type = var_5164_pad_type_0, strides = var_5164_strides_0, weight = layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_5164_cast_fp16")]; + tensor var_5170_pad_type_0 = const()[name = tensor("op_5170_pad_type_0"), val = tensor("valid")]; + tensor var_5170_strides_0 = const()[name = tensor("op_5170_strides_0"), val = tensor([1, 1])]; + tensor var_5170_pad_0 = const()[name = tensor("op_5170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5170_dilations_0 = const()[name = tensor("op_5170_dilations_0"), val = tensor([1, 1])]; + tensor var_5170_groups_0 = const()[name = tensor("op_5170_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374858048))), name = tensor("layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374846208))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5170_cast_fp16 = conv(dilations = var_5170_dilations_0, groups = var_5170_groups_0, pad = var_5170_pad_0, pad_type = var_5170_pad_type_0, strides = var_5170_strides_0, weight = layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_5170_cast_fp16")]; + tensor value_57_cast_fp16 = add(x = var_5164_cast_fp16, y = var_5170_cast_fp16)[name = tensor("value_57_cast_fp16")]; + tensor var_5173 = const()[name = tensor("op_5173"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_57_cast_fp16 = reshape(shape = var_5173, x = query_57_cast_fp16)[name = tensor("mh_q_57_cast_fp16")]; + tensor var_5175_to_fp16 = const()[name = tensor("op_5175_to_fp16"), val = tensor(0x1p-3)]; + tensor var_5176_cast_fp16 = mul(x = mh_q_57_cast_fp16, y = var_5175_to_fp16)[name = tensor("op_5176_cast_fp16")]; + tensor var_5177 = const()[name = tensor("op_5177"), val = tensor([1, 20, 64, -1])]; + tensor var_5178_cast_fp16 = reshape(shape = var_5177, x = key_57_cast_fp16)[name = tensor("op_5178_cast_fp16")]; + tensor mh_w_57_transpose_x_0 = const()[name = tensor("mh_w_57_transpose_x_0"), val = tensor(true)]; + tensor mh_w_57_transpose_y_0 = const()[name = tensor("mh_w_57_transpose_y_0"), val = tensor(false)]; + tensor mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_5176_cast_fp16, y = var_5178_cast_fp16)[name = tensor("mh_w_57_cast_fp16")]; + tensor var_5181_cast_fp16 = softmax(axis = var_5092, x = mh_w_57_cast_fp16)[name = tensor("op_5181_cast_fp16")]; + tensor var_5182 = const()[name = tensor("op_5182"), val = tensor([1, 20, 64, -1])]; + tensor var_5183_cast_fp16 = reshape(shape = var_5182, x = value_57_cast_fp16)[name = tensor("op_5183_cast_fp16")]; + tensor attn_57_transpose_x_0 = const()[name = tensor("attn_57_transpose_x_0"), val = tensor(false)]; + tensor attn_57_transpose_y_0 = const()[name = tensor("attn_57_transpose_y_0"), val = tensor(true)]; + tensor attn_57_cast_fp16 = matmul(transpose_x = attn_57_transpose_x_0, transpose_y = attn_57_transpose_y_0, x = var_5183_cast_fp16, y = var_5181_cast_fp16)[name = tensor("attn_57_cast_fp16")]; + tensor var_5186 = const()[name = tensor("op_5186"), val = tensor([1, 1280, 1, -1])]; + tensor input_225_cast_fp16 = reshape(shape = var_5186, x = attn_57_cast_fp16)[name = tensor("input_225_cast_fp16")]; + tensor var_5196_pad_type_0 = const()[name = tensor("op_5196_pad_type_0"), val = tensor("valid")]; + tensor var_5196_strides_0 = const()[name = tensor("op_5196_strides_0"), val = tensor([1, 1])]; + tensor var_5196_pad_0 = const()[name = tensor("op_5196_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5196_dilations_0 = const()[name = tensor("op_5196_dilations_0"), val = tensor([1, 1])]; + tensor var_5196_groups_0 = const()[name = tensor("op_5196_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375062912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375882176))), name = tensor("layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_28_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375882304)))]; + tensor var_5196_cast_fp16 = conv(bias = layers_28_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5196_dilations_0, groups = var_5196_groups_0, pad = var_5196_pad_0, pad_type = var_5196_pad_type_0, strides = var_5196_strides_0, weight = layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = tensor("op_5196_cast_fp16")]; + tensor var_5202_pad_type_0 = const()[name = tensor("op_5202_pad_type_0"), val = tensor("valid")]; + tensor var_5202_strides_0 = const()[name = tensor("op_5202_strides_0"), val = tensor([1, 1])]; + tensor var_5202_pad_0 = const()[name = tensor("op_5202_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5202_dilations_0 = const()[name = tensor("op_5202_dilations_0"), val = tensor([1, 1])]; + tensor var_5202_groups_0 = const()[name = tensor("op_5202_groups_0"), val = tensor(1)]; + tensor layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375896192))), name = tensor("layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375884928))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5202_cast_fp16 = conv(dilations = var_5202_dilations_0, groups = var_5202_groups_0, pad = var_5202_pad_0, pad_type = var_5202_pad_type_0, strides = var_5202_strides_0, weight = layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_225_cast_fp16)[name = tensor("op_5202_cast_fp16")]; + tensor obj_115_cast_fp16 = add(x = var_5196_cast_fp16, y = var_5202_cast_fp16)[name = tensor("obj_115_cast_fp16")]; + tensor inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = obj_115_cast_fp16)[name = tensor("inputs_115_cast_fp16")]; + tensor out_115_axes_0 = const()[name = tensor("out_115_axes_0"), val = tensor([1])]; + tensor var_5213_to_fp16 = const()[name = tensor("op_5213_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_5213_to_fp16, x = inputs_115_cast_fp16)[name = tensor("out_115_cast_fp16")]; + tensor input_227_gamma_0_to_fp16 = const()[name = tensor("input_227_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376101056)))]; + tensor input_227_beta_0_to_fp16 = const()[name = tensor("input_227_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376103680)))]; + tensor input_227_epsilon_0_to_fp16 = const()[name = tensor("input_227_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_227_cast_fp16 = batch_norm(beta = input_227_beta_0_to_fp16, epsilon = input_227_epsilon_0_to_fp16, gamma = input_227_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_115_cast_fp16)[name = tensor("input_227_cast_fp16")]; + tensor var_5231_pad_type_0 = const()[name = tensor("op_5231_pad_type_0"), val = tensor("valid")]; + tensor var_5231_strides_0 = const()[name = tensor("op_5231_strides_0"), val = tensor([1, 1])]; + tensor var_5231_pad_0 = const()[name = tensor("op_5231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5231_dilations_0 = const()[name = tensor("op_5231_dilations_0"), val = tensor([1, 1])]; + tensor var_5231_groups_0 = const()[name = tensor("op_5231_groups_0"), val = tensor(1)]; + tensor layers_28_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376106304))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379383168))), name = tensor("layers_28_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_28_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379383296)))]; + tensor var_5231_cast_fp16 = conv(bias = layers_28_fc1_inlier_module_bias_to_fp16, dilations = var_5231_dilations_0, groups = var_5231_groups_0, pad = var_5231_pad_0, pad_type = var_5231_pad_type_0, strides = var_5231_strides_0, weight = layers_28_fc1_inlier_module_weight_to_fp16_palettized, x = input_227_cast_fp16)[name = tensor("op_5231_cast_fp16")]; + tensor var_5237_pad_type_0 = const()[name = tensor("op_5237_pad_type_0"), val = tensor("valid")]; + tensor var_5237_strides_0 = const()[name = tensor("op_5237_strides_0"), val = tensor([1, 1])]; + tensor var_5237_pad_0 = const()[name = tensor("op_5237_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5237_dilations_0 = const()[name = tensor("op_5237_dilations_0"), val = tensor([1, 1])]; + tensor var_5237_groups_0 = const()[name = tensor("op_5237_groups_0"), val = tensor(1)]; + tensor layers_28_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379450816))), name = tensor("layers_28_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379393600))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_5237_cast_fp16 = conv(dilations = var_5237_dilations_0, groups = var_5237_groups_0, pad = var_5237_pad_0, pad_type = var_5237_pad_type_0, strides = var_5237_strides_0, weight = layers_28_fc1_outlier_module_weight_to_fp16_sparsified, x = input_227_cast_fp16)[name = tensor("op_5237_cast_fp16")]; + tensor input_229_cast_fp16 = add(x = var_5231_cast_fp16, y = var_5237_cast_fp16)[name = tensor("input_229_cast_fp16")]; + tensor input_231_mode_0 = const()[name = tensor("input_231_mode_0"), val = tensor("EXACT")]; + tensor input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor("input_231_cast_fp16")]; + tensor var_5248_pad_type_0 = const()[name = tensor("op_5248_pad_type_0"), val = tensor("valid")]; + tensor var_5248_strides_0 = const()[name = tensor("op_5248_strides_0"), val = tensor([1, 1])]; + tensor var_5248_pad_0 = const()[name = tensor("op_5248_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5248_dilations_0 = const()[name = tensor("op_5248_dilations_0"), val = tensor([1, 1])]; + tensor var_5248_groups_0 = const()[name = tensor("op_5248_groups_0"), val = tensor(1)]; + tensor layers_28_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380270080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383546944))), name = tensor("layers_28_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_28_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383547072)))]; + tensor var_5248_cast_fp16 = conv(bias = layers_28_fc2_inlier_module_bias_to_fp16, dilations = var_5248_dilations_0, groups = var_5248_groups_0, pad = var_5248_pad_0, pad_type = var_5248_pad_type_0, strides = var_5248_strides_0, weight = layers_28_fc2_inlier_module_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = tensor("op_5248_cast_fp16")]; + tensor var_5254_pad_type_0 = const()[name = tensor("op_5254_pad_type_0"), val = tensor("valid")]; + tensor var_5254_strides_0 = const()[name = tensor("op_5254_strides_0"), val = tensor([1, 1])]; + tensor var_5254_pad_0 = const()[name = tensor("op_5254_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5254_dilations_0 = const()[name = tensor("op_5254_dilations_0"), val = tensor([1, 1])]; + tensor var_5254_groups_0 = const()[name = tensor("op_5254_groups_0"), val = tensor(1)]; + tensor layers_28_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383608320))), name = tensor("layers_28_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383549696))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_5254_cast_fp16 = conv(dilations = var_5254_dilations_0, groups = var_5254_groups_0, pad = var_5254_pad_0, pad_type = var_5254_pad_type_0, strides = var_5254_strides_0, weight = layers_28_fc2_outlier_module_weight_to_fp16_sparsified, x = input_231_cast_fp16)[name = tensor("op_5254_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = var_5248_cast_fp16, y = var_5254_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; + tensor inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = hidden_states_61_cast_fp16)[name = tensor("inputs_117_cast_fp16")]; + tensor var_5264 = const()[name = tensor("op_5264"), val = tensor(3)]; + tensor out_117_axes_0 = const()[name = tensor("out_117_axes_0"), val = tensor([1])]; + tensor var_5283_to_fp16 = const()[name = tensor("op_5283_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_117_cast_fp16 = layer_norm(axes = out_117_axes_0, epsilon = var_5283_to_fp16, x = inputs_117_cast_fp16)[name = tensor("out_117_cast_fp16")]; + tensor obj_117_gamma_0_to_fp16 = const()[name = tensor("obj_117_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384427584)))]; + tensor obj_117_beta_0_to_fp16 = const()[name = tensor("obj_117_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384430208)))]; + tensor obj_117_epsilon_0_to_fp16 = const()[name = tensor("obj_117_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_117_cast_fp16 = batch_norm(beta = obj_117_beta_0_to_fp16, epsilon = obj_117_epsilon_0_to_fp16, gamma = obj_117_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_117_cast_fp16)[name = tensor("obj_117_cast_fp16")]; + tensor var_5305_pad_type_0 = const()[name = tensor("op_5305_pad_type_0"), val = tensor("valid")]; + tensor var_5305_strides_0 = const()[name = tensor("op_5305_strides_0"), val = tensor([1, 1])]; + tensor var_5305_pad_0 = const()[name = tensor("op_5305_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5305_dilations_0 = const()[name = tensor("op_5305_dilations_0"), val = tensor([1, 1])]; + tensor var_5305_groups_0 = const()[name = tensor("op_5305_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384432832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385252096))), name = tensor("layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_29_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385252224)))]; + tensor var_5305_cast_fp16 = conv(bias = layers_29_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5305_dilations_0, groups = var_5305_groups_0, pad = var_5305_pad_0, pad_type = var_5305_pad_type_0, strides = var_5305_strides_0, weight = layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_5305_cast_fp16")]; + tensor var_5311_pad_type_0 = const()[name = tensor("op_5311_pad_type_0"), val = tensor("valid")]; + tensor var_5311_strides_0 = const()[name = tensor("op_5311_strides_0"), val = tensor([1, 1])]; + tensor var_5311_pad_0 = const()[name = tensor("op_5311_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5311_dilations_0 = const()[name = tensor("op_5311_dilations_0"), val = tensor([1, 1])]; + tensor var_5311_groups_0 = const()[name = tensor("op_5311_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385277312))), name = tensor("layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385254848))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5311_cast_fp16 = conv(dilations = var_5311_dilations_0, groups = var_5311_groups_0, pad = var_5311_pad_0, pad_type = var_5311_pad_type_0, strides = var_5311_strides_0, weight = layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_5311_cast_fp16")]; + tensor query_59_cast_fp16 = add(x = var_5305_cast_fp16, y = var_5311_cast_fp16)[name = tensor("query_59_cast_fp16")]; + tensor var_5320_pad_type_0 = const()[name = tensor("op_5320_pad_type_0"), val = tensor("valid")]; + tensor var_5320_strides_0 = const()[name = tensor("op_5320_strides_0"), val = tensor([1, 1])]; + tensor var_5320_pad_0 = const()[name = tensor("op_5320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5320_dilations_0 = const()[name = tensor("op_5320_dilations_0"), val = tensor([1, 1])]; + tensor var_5320_groups_0 = const()[name = tensor("op_5320_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385482176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386301440))), name = tensor("layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5320_cast_fp16 = conv(dilations = var_5320_dilations_0, groups = var_5320_groups_0, pad = var_5320_pad_0, pad_type = var_5320_pad_type_0, strides = var_5320_strides_0, weight = layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_5320_cast_fp16")]; + tensor var_5326_pad_type_0 = const()[name = tensor("op_5326_pad_type_0"), val = tensor("valid")]; + tensor var_5326_strides_0 = const()[name = tensor("op_5326_strides_0"), val = tensor([1, 1])]; + tensor var_5326_pad_0 = const()[name = tensor("op_5326_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5326_dilations_0 = const()[name = tensor("op_5326_dilations_0"), val = tensor([1, 1])]; + tensor var_5326_groups_0 = const()[name = tensor("op_5326_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386323840))), name = tensor("layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386301568))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5326_cast_fp16 = conv(dilations = var_5326_dilations_0, groups = var_5326_groups_0, pad = var_5326_pad_0, pad_type = var_5326_pad_type_0, strides = var_5326_strides_0, weight = layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_5326_cast_fp16")]; + tensor key_59_cast_fp16 = add(x = var_5320_cast_fp16, y = var_5326_cast_fp16)[name = tensor("key_59_cast_fp16")]; + tensor var_5336_pad_type_0 = const()[name = tensor("op_5336_pad_type_0"), val = tensor("valid")]; + tensor var_5336_strides_0 = const()[name = tensor("op_5336_strides_0"), val = tensor([1, 1])]; + tensor var_5336_pad_0 = const()[name = tensor("op_5336_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5336_dilations_0 = const()[name = tensor("op_5336_dilations_0"), val = tensor([1, 1])]; + tensor var_5336_groups_0 = const()[name = tensor("op_5336_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386528704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387347968))), name = tensor("layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_29_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387348096)))]; + tensor var_5336_cast_fp16 = conv(bias = layers_29_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5336_dilations_0, groups = var_5336_groups_0, pad = var_5336_pad_0, pad_type = var_5336_pad_type_0, strides = var_5336_strides_0, weight = layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_5336_cast_fp16")]; + tensor var_5342_pad_type_0 = const()[name = tensor("op_5342_pad_type_0"), val = tensor("valid")]; + tensor var_5342_strides_0 = const()[name = tensor("op_5342_strides_0"), val = tensor([1, 1])]; + tensor var_5342_pad_0 = const()[name = tensor("op_5342_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5342_dilations_0 = const()[name = tensor("op_5342_dilations_0"), val = tensor([1, 1])]; + tensor var_5342_groups_0 = const()[name = tensor("op_5342_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387363392))), name = tensor("layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387350720))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5342_cast_fp16 = conv(dilations = var_5342_dilations_0, groups = var_5342_groups_0, pad = var_5342_pad_0, pad_type = var_5342_pad_type_0, strides = var_5342_strides_0, weight = layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_5342_cast_fp16")]; + tensor value_59_cast_fp16 = add(x = var_5336_cast_fp16, y = var_5342_cast_fp16)[name = tensor("value_59_cast_fp16")]; + tensor var_5345 = const()[name = tensor("op_5345"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_59_cast_fp16 = reshape(shape = var_5345, x = query_59_cast_fp16)[name = tensor("mh_q_59_cast_fp16")]; + tensor var_5347_to_fp16 = const()[name = tensor("op_5347_to_fp16"), val = tensor(0x1p-3)]; + tensor var_5348_cast_fp16 = mul(x = mh_q_59_cast_fp16, y = var_5347_to_fp16)[name = tensor("op_5348_cast_fp16")]; + tensor var_5349 = const()[name = tensor("op_5349"), val = tensor([1, 20, 64, -1])]; + tensor var_5350_cast_fp16 = reshape(shape = var_5349, x = key_59_cast_fp16)[name = tensor("op_5350_cast_fp16")]; + tensor mh_w_59_transpose_x_0 = const()[name = tensor("mh_w_59_transpose_x_0"), val = tensor(true)]; + tensor mh_w_59_transpose_y_0 = const()[name = tensor("mh_w_59_transpose_y_0"), val = tensor(false)]; + tensor mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_5348_cast_fp16, y = var_5350_cast_fp16)[name = tensor("mh_w_59_cast_fp16")]; + tensor var_5353_cast_fp16 = softmax(axis = var_5264, x = mh_w_59_cast_fp16)[name = tensor("op_5353_cast_fp16")]; + tensor var_5354 = const()[name = tensor("op_5354"), val = tensor([1, 20, 64, -1])]; + tensor var_5355_cast_fp16 = reshape(shape = var_5354, x = value_59_cast_fp16)[name = tensor("op_5355_cast_fp16")]; + tensor attn_59_transpose_x_0 = const()[name = tensor("attn_59_transpose_x_0"), val = tensor(false)]; + tensor attn_59_transpose_y_0 = const()[name = tensor("attn_59_transpose_y_0"), val = tensor(true)]; + tensor attn_59_cast_fp16 = matmul(transpose_x = attn_59_transpose_x_0, transpose_y = attn_59_transpose_y_0, x = var_5355_cast_fp16, y = var_5353_cast_fp16)[name = tensor("attn_59_cast_fp16")]; + tensor var_5358 = const()[name = tensor("op_5358"), val = tensor([1, 1280, 1, -1])]; + tensor input_233_cast_fp16 = reshape(shape = var_5358, x = attn_59_cast_fp16)[name = tensor("input_233_cast_fp16")]; + tensor var_5368_pad_type_0 = const()[name = tensor("op_5368_pad_type_0"), val = tensor("valid")]; + tensor var_5368_strides_0 = const()[name = tensor("op_5368_strides_0"), val = tensor([1, 1])]; + tensor var_5368_pad_0 = const()[name = tensor("op_5368_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5368_dilations_0 = const()[name = tensor("op_5368_dilations_0"), val = tensor([1, 1])]; + tensor var_5368_groups_0 = const()[name = tensor("op_5368_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387568256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388387520))), name = tensor("layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_29_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388387648)))]; + tensor var_5368_cast_fp16 = conv(bias = layers_29_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5368_dilations_0, groups = var_5368_groups_0, pad = var_5368_pad_0, pad_type = var_5368_pad_type_0, strides = var_5368_strides_0, weight = layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = tensor("op_5368_cast_fp16")]; + tensor var_5374_pad_type_0 = const()[name = tensor("op_5374_pad_type_0"), val = tensor("valid")]; + tensor var_5374_strides_0 = const()[name = tensor("op_5374_strides_0"), val = tensor([1, 1])]; + tensor var_5374_pad_0 = const()[name = tensor("op_5374_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5374_dilations_0 = const()[name = tensor("op_5374_dilations_0"), val = tensor([1, 1])]; + tensor var_5374_groups_0 = const()[name = tensor("op_5374_groups_0"), val = tensor(1)]; + tensor layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388402304))), name = tensor("layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388390272))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5374_cast_fp16 = conv(dilations = var_5374_dilations_0, groups = var_5374_groups_0, pad = var_5374_pad_0, pad_type = var_5374_pad_type_0, strides = var_5374_strides_0, weight = layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_233_cast_fp16)[name = tensor("op_5374_cast_fp16")]; + tensor obj_119_cast_fp16 = add(x = var_5368_cast_fp16, y = var_5374_cast_fp16)[name = tensor("obj_119_cast_fp16")]; + tensor inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = obj_119_cast_fp16)[name = tensor("inputs_119_cast_fp16")]; + tensor out_119_axes_0 = const()[name = tensor("out_119_axes_0"), val = tensor([1])]; + tensor var_5385_to_fp16 = const()[name = tensor("op_5385_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_119_cast_fp16 = layer_norm(axes = out_119_axes_0, epsilon = var_5385_to_fp16, x = inputs_119_cast_fp16)[name = tensor("out_119_cast_fp16")]; + tensor input_235_gamma_0_to_fp16 = const()[name = tensor("input_235_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388607168)))]; + tensor input_235_beta_0_to_fp16 = const()[name = tensor("input_235_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388609792)))]; + tensor input_235_epsilon_0_to_fp16 = const()[name = tensor("input_235_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_235_cast_fp16 = batch_norm(beta = input_235_beta_0_to_fp16, epsilon = input_235_epsilon_0_to_fp16, gamma = input_235_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_119_cast_fp16)[name = tensor("input_235_cast_fp16")]; + tensor var_5403_pad_type_0 = const()[name = tensor("op_5403_pad_type_0"), val = tensor("valid")]; + tensor var_5403_strides_0 = const()[name = tensor("op_5403_strides_0"), val = tensor([1, 1])]; + tensor var_5403_pad_0 = const()[name = tensor("op_5403_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5403_dilations_0 = const()[name = tensor("op_5403_dilations_0"), val = tensor([1, 1])]; + tensor var_5403_groups_0 = const()[name = tensor("op_5403_groups_0"), val = tensor(1)]; + tensor layers_29_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388612416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391889280))), name = tensor("layers_29_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_29_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391889408)))]; + tensor var_5403_cast_fp16 = conv(bias = layers_29_fc1_inlier_module_bias_to_fp16, dilations = var_5403_dilations_0, groups = var_5403_groups_0, pad = var_5403_pad_0, pad_type = var_5403_pad_type_0, strides = var_5403_strides_0, weight = layers_29_fc1_inlier_module_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = tensor("op_5403_cast_fp16")]; + tensor var_5409_pad_type_0 = const()[name = tensor("op_5409_pad_type_0"), val = tensor("valid")]; + tensor var_5409_strides_0 = const()[name = tensor("op_5409_strides_0"), val = tensor([1, 1])]; + tensor var_5409_pad_0 = const()[name = tensor("op_5409_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5409_dilations_0 = const()[name = tensor("op_5409_dilations_0"), val = tensor([1, 1])]; + tensor var_5409_groups_0 = const()[name = tensor("op_5409_groups_0"), val = tensor(1)]; + tensor layers_29_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391957888))), name = tensor("layers_29_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391899712))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_5409_cast_fp16 = conv(dilations = var_5409_dilations_0, groups = var_5409_groups_0, pad = var_5409_pad_0, pad_type = var_5409_pad_type_0, strides = var_5409_strides_0, weight = layers_29_fc1_outlier_module_weight_to_fp16_sparsified, x = input_235_cast_fp16)[name = tensor("op_5409_cast_fp16")]; + tensor input_237_cast_fp16 = add(x = var_5403_cast_fp16, y = var_5409_cast_fp16)[name = tensor("input_237_cast_fp16")]; + tensor input_239_mode_0 = const()[name = tensor("input_239_mode_0"), val = tensor("EXACT")]; + tensor input_239_cast_fp16 = gelu(mode = input_239_mode_0, x = input_237_cast_fp16)[name = tensor("input_239_cast_fp16")]; + tensor var_5420_pad_type_0 = const()[name = tensor("op_5420_pad_type_0"), val = tensor("valid")]; + tensor var_5420_strides_0 = const()[name = tensor("op_5420_strides_0"), val = tensor([1, 1])]; + tensor var_5420_pad_0 = const()[name = tensor("op_5420_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5420_dilations_0 = const()[name = tensor("op_5420_dilations_0"), val = tensor([1, 1])]; + tensor var_5420_groups_0 = const()[name = tensor("op_5420_groups_0"), val = tensor(1)]; + tensor layers_29_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(392777152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396054016))), name = tensor("layers_29_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_29_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396054144)))]; + tensor var_5420_cast_fp16 = conv(bias = layers_29_fc2_inlier_module_bias_to_fp16, dilations = var_5420_dilations_0, groups = var_5420_groups_0, pad = var_5420_pad_0, pad_type = var_5420_pad_type_0, strides = var_5420_strides_0, weight = layers_29_fc2_inlier_module_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = tensor("op_5420_cast_fp16")]; + tensor var_5426_pad_type_0 = const()[name = tensor("op_5426_pad_type_0"), val = tensor("valid")]; + tensor var_5426_strides_0 = const()[name = tensor("op_5426_strides_0"), val = tensor([1, 1])]; + tensor var_5426_pad_0 = const()[name = tensor("op_5426_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5426_dilations_0 = const()[name = tensor("op_5426_dilations_0"), val = tensor([1, 1])]; + tensor var_5426_groups_0 = const()[name = tensor("op_5426_groups_0"), val = tensor(1)]; + tensor layers_29_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396118272))), name = tensor("layers_29_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396056768))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_5426_cast_fp16 = conv(dilations = var_5426_dilations_0, groups = var_5426_groups_0, pad = var_5426_pad_0, pad_type = var_5426_pad_type_0, strides = var_5426_strides_0, weight = layers_29_fc2_outlier_module_weight_to_fp16_sparsified, x = input_239_cast_fp16)[name = tensor("op_5426_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = add(x = var_5420_cast_fp16, y = var_5426_cast_fp16)[name = tensor("hidden_states_63_cast_fp16")]; + tensor inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = hidden_states_63_cast_fp16)[name = tensor("inputs_121_cast_fp16")]; + tensor var_5436 = const()[name = tensor("op_5436"), val = tensor(3)]; + tensor out_121_axes_0 = const()[name = tensor("out_121_axes_0"), val = tensor([1])]; + tensor var_5455_to_fp16 = const()[name = tensor("op_5455_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_5455_to_fp16, x = inputs_121_cast_fp16)[name = tensor("out_121_cast_fp16")]; + tensor obj_121_gamma_0_to_fp16 = const()[name = tensor("obj_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396937536)))]; + tensor obj_121_beta_0_to_fp16 = const()[name = tensor("obj_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396940160)))]; + tensor obj_121_epsilon_0_to_fp16 = const()[name = tensor("obj_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_121_cast_fp16)[name = tensor("obj_121_cast_fp16")]; + tensor var_5477_pad_type_0 = const()[name = tensor("op_5477_pad_type_0"), val = tensor("valid")]; + tensor var_5477_strides_0 = const()[name = tensor("op_5477_strides_0"), val = tensor([1, 1])]; + tensor var_5477_pad_0 = const()[name = tensor("op_5477_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5477_dilations_0 = const()[name = tensor("op_5477_dilations_0"), val = tensor([1, 1])]; + tensor var_5477_groups_0 = const()[name = tensor("op_5477_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396942784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397762048))), name = tensor("layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_30_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397762176)))]; + tensor var_5477_cast_fp16 = conv(bias = layers_30_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5477_dilations_0, groups = var_5477_groups_0, pad = var_5477_pad_0, pad_type = var_5477_pad_type_0, strides = var_5477_strides_0, weight = layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_5477_cast_fp16")]; + tensor var_5483_pad_type_0 = const()[name = tensor("op_5483_pad_type_0"), val = tensor("valid")]; + tensor var_5483_strides_0 = const()[name = tensor("op_5483_strides_0"), val = tensor([1, 1])]; + tensor var_5483_pad_0 = const()[name = tensor("op_5483_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5483_dilations_0 = const()[name = tensor("op_5483_dilations_0"), val = tensor([1, 1])]; + tensor var_5483_groups_0 = const()[name = tensor("op_5483_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397784960))), name = tensor("layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397764800))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5483_cast_fp16 = conv(dilations = var_5483_dilations_0, groups = var_5483_groups_0, pad = var_5483_pad_0, pad_type = var_5483_pad_type_0, strides = var_5483_strides_0, weight = layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_5483_cast_fp16")]; + tensor query_61_cast_fp16 = add(x = var_5477_cast_fp16, y = var_5483_cast_fp16)[name = tensor("query_61_cast_fp16")]; + tensor var_5492_pad_type_0 = const()[name = tensor("op_5492_pad_type_0"), val = tensor("valid")]; + tensor var_5492_strides_0 = const()[name = tensor("op_5492_strides_0"), val = tensor([1, 1])]; + tensor var_5492_pad_0 = const()[name = tensor("op_5492_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5492_dilations_0 = const()[name = tensor("op_5492_dilations_0"), val = tensor([1, 1])]; + tensor var_5492_groups_0 = const()[name = tensor("op_5492_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397989824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398809088))), name = tensor("layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5492_cast_fp16 = conv(dilations = var_5492_dilations_0, groups = var_5492_groups_0, pad = var_5492_pad_0, pad_type = var_5492_pad_type_0, strides = var_5492_strides_0, weight = layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_5492_cast_fp16")]; + tensor var_5498_pad_type_0 = const()[name = tensor("op_5498_pad_type_0"), val = tensor("valid")]; + tensor var_5498_strides_0 = const()[name = tensor("op_5498_strides_0"), val = tensor([1, 1])]; + tensor var_5498_pad_0 = const()[name = tensor("op_5498_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5498_dilations_0 = const()[name = tensor("op_5498_dilations_0"), val = tensor([1, 1])]; + tensor var_5498_groups_0 = const()[name = tensor("op_5498_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398830400))), name = tensor("layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398809216))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5498_cast_fp16 = conv(dilations = var_5498_dilations_0, groups = var_5498_groups_0, pad = var_5498_pad_0, pad_type = var_5498_pad_type_0, strides = var_5498_strides_0, weight = layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_5498_cast_fp16")]; + tensor key_61_cast_fp16 = add(x = var_5492_cast_fp16, y = var_5498_cast_fp16)[name = tensor("key_61_cast_fp16")]; + tensor var_5508_pad_type_0 = const()[name = tensor("op_5508_pad_type_0"), val = tensor("valid")]; + tensor var_5508_strides_0 = const()[name = tensor("op_5508_strides_0"), val = tensor([1, 1])]; + tensor var_5508_pad_0 = const()[name = tensor("op_5508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5508_dilations_0 = const()[name = tensor("op_5508_dilations_0"), val = tensor([1, 1])]; + tensor var_5508_groups_0 = const()[name = tensor("op_5508_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399035264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399854528))), name = tensor("layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_30_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399854656)))]; + tensor var_5508_cast_fp16 = conv(bias = layers_30_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5508_dilations_0, groups = var_5508_groups_0, pad = var_5508_pad_0, pad_type = var_5508_pad_type_0, strides = var_5508_strides_0, weight = layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_5508_cast_fp16")]; + tensor var_5514_pad_type_0 = const()[name = tensor("op_5514_pad_type_0"), val = tensor("valid")]; + tensor var_5514_strides_0 = const()[name = tensor("op_5514_strides_0"), val = tensor([1, 1])]; + tensor var_5514_pad_0 = const()[name = tensor("op_5514_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5514_dilations_0 = const()[name = tensor("op_5514_dilations_0"), val = tensor([1, 1])]; + tensor var_5514_groups_0 = const()[name = tensor("op_5514_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399869696))), name = tensor("layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399857280))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5514_cast_fp16 = conv(dilations = var_5514_dilations_0, groups = var_5514_groups_0, pad = var_5514_pad_0, pad_type = var_5514_pad_type_0, strides = var_5514_strides_0, weight = layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_5514_cast_fp16")]; + tensor value_61_cast_fp16 = add(x = var_5508_cast_fp16, y = var_5514_cast_fp16)[name = tensor("value_61_cast_fp16")]; + tensor var_5517 = const()[name = tensor("op_5517"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_61_cast_fp16 = reshape(shape = var_5517, x = query_61_cast_fp16)[name = tensor("mh_q_61_cast_fp16")]; + tensor var_5519_to_fp16 = const()[name = tensor("op_5519_to_fp16"), val = tensor(0x1p-3)]; + tensor var_5520_cast_fp16 = mul(x = mh_q_61_cast_fp16, y = var_5519_to_fp16)[name = tensor("op_5520_cast_fp16")]; + tensor var_5521 = const()[name = tensor("op_5521"), val = tensor([1, 20, 64, -1])]; + tensor var_5522_cast_fp16 = reshape(shape = var_5521, x = key_61_cast_fp16)[name = tensor("op_5522_cast_fp16")]; + tensor mh_w_61_transpose_x_0 = const()[name = tensor("mh_w_61_transpose_x_0"), val = tensor(true)]; + tensor mh_w_61_transpose_y_0 = const()[name = tensor("mh_w_61_transpose_y_0"), val = tensor(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_5520_cast_fp16, y = var_5522_cast_fp16)[name = tensor("mh_w_61_cast_fp16")]; + tensor var_5525_cast_fp16 = softmax(axis = var_5436, x = mh_w_61_cast_fp16)[name = tensor("op_5525_cast_fp16")]; + tensor var_5526 = const()[name = tensor("op_5526"), val = tensor([1, 20, 64, -1])]; + tensor var_5527_cast_fp16 = reshape(shape = var_5526, x = value_61_cast_fp16)[name = tensor("op_5527_cast_fp16")]; + tensor attn_61_transpose_x_0 = const()[name = tensor("attn_61_transpose_x_0"), val = tensor(false)]; + tensor attn_61_transpose_y_0 = const()[name = tensor("attn_61_transpose_y_0"), val = tensor(true)]; + tensor attn_61_cast_fp16 = matmul(transpose_x = attn_61_transpose_x_0, transpose_y = attn_61_transpose_y_0, x = var_5527_cast_fp16, y = var_5525_cast_fp16)[name = tensor("attn_61_cast_fp16")]; + tensor var_5530 = const()[name = tensor("op_5530"), val = tensor([1, 1280, 1, -1])]; + tensor input_241_cast_fp16 = reshape(shape = var_5530, x = attn_61_cast_fp16)[name = tensor("input_241_cast_fp16")]; + tensor var_5540_pad_type_0 = const()[name = tensor("op_5540_pad_type_0"), val = tensor("valid")]; + tensor var_5540_strides_0 = const()[name = tensor("op_5540_strides_0"), val = tensor([1, 1])]; + tensor var_5540_pad_0 = const()[name = tensor("op_5540_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5540_dilations_0 = const()[name = tensor("op_5540_dilations_0"), val = tensor([1, 1])]; + tensor var_5540_groups_0 = const()[name = tensor("op_5540_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400074560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400893824))), name = tensor("layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_30_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400893952)))]; + tensor var_5540_cast_fp16 = conv(bias = layers_30_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5540_dilations_0, groups = var_5540_groups_0, pad = var_5540_pad_0, pad_type = var_5540_pad_type_0, strides = var_5540_strides_0, weight = layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = tensor("op_5540_cast_fp16")]; + tensor var_5546_pad_type_0 = const()[name = tensor("op_5546_pad_type_0"), val = tensor("valid")]; + tensor var_5546_strides_0 = const()[name = tensor("op_5546_strides_0"), val = tensor([1, 1])]; + tensor var_5546_pad_0 = const()[name = tensor("op_5546_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5546_dilations_0 = const()[name = tensor("op_5546_dilations_0"), val = tensor([1, 1])]; + tensor var_5546_groups_0 = const()[name = tensor("op_5546_groups_0"), val = tensor(1)]; + tensor layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400908032))), name = tensor("layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400896576))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5546_cast_fp16 = conv(dilations = var_5546_dilations_0, groups = var_5546_groups_0, pad = var_5546_pad_0, pad_type = var_5546_pad_type_0, strides = var_5546_strides_0, weight = layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_241_cast_fp16)[name = tensor("op_5546_cast_fp16")]; + tensor obj_123_cast_fp16 = add(x = var_5540_cast_fp16, y = var_5546_cast_fp16)[name = tensor("obj_123_cast_fp16")]; + tensor inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = obj_123_cast_fp16)[name = tensor("inputs_123_cast_fp16")]; + tensor out_123_axes_0 = const()[name = tensor("out_123_axes_0"), val = tensor([1])]; + tensor var_5557_to_fp16 = const()[name = tensor("op_5557_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_123_cast_fp16 = layer_norm(axes = out_123_axes_0, epsilon = var_5557_to_fp16, x = inputs_123_cast_fp16)[name = tensor("out_123_cast_fp16")]; + tensor input_243_gamma_0_to_fp16 = const()[name = tensor("input_243_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401112896)))]; + tensor input_243_beta_0_to_fp16 = const()[name = tensor("input_243_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401115520)))]; + tensor input_243_epsilon_0_to_fp16 = const()[name = tensor("input_243_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_243_cast_fp16 = batch_norm(beta = input_243_beta_0_to_fp16, epsilon = input_243_epsilon_0_to_fp16, gamma = input_243_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_123_cast_fp16)[name = tensor("input_243_cast_fp16")]; + tensor var_5575_pad_type_0 = const()[name = tensor("op_5575_pad_type_0"), val = tensor("valid")]; + tensor var_5575_strides_0 = const()[name = tensor("op_5575_strides_0"), val = tensor([1, 1])]; + tensor var_5575_pad_0 = const()[name = tensor("op_5575_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5575_dilations_0 = const()[name = tensor("op_5575_dilations_0"), val = tensor([1, 1])]; + tensor var_5575_groups_0 = const()[name = tensor("op_5575_groups_0"), val = tensor(1)]; + tensor layers_30_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401118144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404395008))), name = tensor("layers_30_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_30_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404395136)))]; + tensor var_5575_cast_fp16 = conv(bias = layers_30_fc1_inlier_module_bias_to_fp16, dilations = var_5575_dilations_0, groups = var_5575_groups_0, pad = var_5575_pad_0, pad_type = var_5575_pad_type_0, strides = var_5575_strides_0, weight = layers_30_fc1_inlier_module_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = tensor("op_5575_cast_fp16")]; + tensor var_5581_pad_type_0 = const()[name = tensor("op_5581_pad_type_0"), val = tensor("valid")]; + tensor var_5581_strides_0 = const()[name = tensor("op_5581_strides_0"), val = tensor([1, 1])]; + tensor var_5581_pad_0 = const()[name = tensor("op_5581_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5581_dilations_0 = const()[name = tensor("op_5581_dilations_0"), val = tensor([1, 1])]; + tensor var_5581_groups_0 = const()[name = tensor("op_5581_groups_0"), val = tensor(1)]; + tensor layers_30_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404464960))), name = tensor("layers_30_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404405440))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_5581_cast_fp16 = conv(dilations = var_5581_dilations_0, groups = var_5581_groups_0, pad = var_5581_pad_0, pad_type = var_5581_pad_type_0, strides = var_5581_strides_0, weight = layers_30_fc1_outlier_module_weight_to_fp16_sparsified, x = input_243_cast_fp16)[name = tensor("op_5581_cast_fp16")]; + tensor input_245_cast_fp16 = add(x = var_5575_cast_fp16, y = var_5581_cast_fp16)[name = tensor("input_245_cast_fp16")]; + tensor input_247_mode_0 = const()[name = tensor("input_247_mode_0"), val = tensor("EXACT")]; + tensor input_247_cast_fp16 = gelu(mode = input_247_mode_0, x = input_245_cast_fp16)[name = tensor("input_247_cast_fp16")]; + tensor var_5592_pad_type_0 = const()[name = tensor("op_5592_pad_type_0"), val = tensor("valid")]; + tensor var_5592_strides_0 = const()[name = tensor("op_5592_strides_0"), val = tensor([1, 1])]; + tensor var_5592_pad_0 = const()[name = tensor("op_5592_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5592_dilations_0 = const()[name = tensor("op_5592_dilations_0"), val = tensor([1, 1])]; + tensor var_5592_groups_0 = const()[name = tensor("op_5592_groups_0"), val = tensor(1)]; + tensor layers_30_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405284224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408561088))), name = tensor("layers_30_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_30_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408561216)))]; + tensor var_5592_cast_fp16 = conv(bias = layers_30_fc2_inlier_module_bias_to_fp16, dilations = var_5592_dilations_0, groups = var_5592_groups_0, pad = var_5592_pad_0, pad_type = var_5592_pad_type_0, strides = var_5592_strides_0, weight = layers_30_fc2_inlier_module_weight_to_fp16_palettized, x = input_247_cast_fp16)[name = tensor("op_5592_cast_fp16")]; + tensor var_5598_pad_type_0 = const()[name = tensor("op_5598_pad_type_0"), val = tensor("valid")]; + tensor var_5598_strides_0 = const()[name = tensor("op_5598_strides_0"), val = tensor([1, 1])]; + tensor var_5598_pad_0 = const()[name = tensor("op_5598_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5598_dilations_0 = const()[name = tensor("op_5598_dilations_0"), val = tensor([1, 1])]; + tensor var_5598_groups_0 = const()[name = tensor("op_5598_groups_0"), val = tensor(1)]; + tensor layers_30_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408624832))), name = tensor("layers_30_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408563840))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_5598_cast_fp16 = conv(dilations = var_5598_dilations_0, groups = var_5598_groups_0, pad = var_5598_pad_0, pad_type = var_5598_pad_type_0, strides = var_5598_strides_0, weight = layers_30_fc2_outlier_module_weight_to_fp16_sparsified, x = input_247_cast_fp16)[name = tensor("op_5598_cast_fp16")]; + tensor hidden_states_65_cast_fp16 = add(x = var_5592_cast_fp16, y = var_5598_cast_fp16)[name = tensor("hidden_states_65_cast_fp16")]; + tensor inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = hidden_states_65_cast_fp16)[name = tensor("inputs_125_cast_fp16")]; + tensor var_5608 = const()[name = tensor("op_5608"), val = tensor(3)]; + tensor out_125_axes_0 = const()[name = tensor("out_125_axes_0"), val = tensor([1])]; + tensor var_5627_to_fp16 = const()[name = tensor("op_5627_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_125_cast_fp16 = layer_norm(axes = out_125_axes_0, epsilon = var_5627_to_fp16, x = inputs_125_cast_fp16)[name = tensor("out_125_cast_fp16")]; + tensor obj_125_gamma_0_to_fp16 = const()[name = tensor("obj_125_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409444096)))]; + tensor obj_125_beta_0_to_fp16 = const()[name = tensor("obj_125_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409446720)))]; + tensor obj_125_epsilon_0_to_fp16 = const()[name = tensor("obj_125_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_125_cast_fp16 = batch_norm(beta = obj_125_beta_0_to_fp16, epsilon = obj_125_epsilon_0_to_fp16, gamma = obj_125_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_125_cast_fp16)[name = tensor("obj_125_cast_fp16")]; + tensor var_5649_pad_type_0 = const()[name = tensor("op_5649_pad_type_0"), val = tensor("valid")]; + tensor var_5649_strides_0 = const()[name = tensor("op_5649_strides_0"), val = tensor([1, 1])]; + tensor var_5649_pad_0 = const()[name = tensor("op_5649_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5649_dilations_0 = const()[name = tensor("op_5649_dilations_0"), val = tensor([1, 1])]; + tensor var_5649_groups_0 = const()[name = tensor("op_5649_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409449344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410268608))), name = tensor("layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_31_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410268736)))]; + tensor var_5649_cast_fp16 = conv(bias = layers_31_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5649_dilations_0, groups = var_5649_groups_0, pad = var_5649_pad_0, pad_type = var_5649_pad_type_0, strides = var_5649_strides_0, weight = layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_5649_cast_fp16")]; + tensor var_5655_pad_type_0 = const()[name = tensor("op_5655_pad_type_0"), val = tensor("valid")]; + tensor var_5655_strides_0 = const()[name = tensor("op_5655_strides_0"), val = tensor([1, 1])]; + tensor var_5655_pad_0 = const()[name = tensor("op_5655_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5655_dilations_0 = const()[name = tensor("op_5655_dilations_0"), val = tensor([1, 1])]; + tensor var_5655_groups_0 = const()[name = tensor("op_5655_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410290240))), name = tensor("layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410271360))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5655_cast_fp16 = conv(dilations = var_5655_dilations_0, groups = var_5655_groups_0, pad = var_5655_pad_0, pad_type = var_5655_pad_type_0, strides = var_5655_strides_0, weight = layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_5655_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_5649_cast_fp16, y = var_5655_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_5664_pad_type_0 = const()[name = tensor("op_5664_pad_type_0"), val = tensor("valid")]; + tensor var_5664_strides_0 = const()[name = tensor("op_5664_strides_0"), val = tensor([1, 1])]; + tensor var_5664_pad_0 = const()[name = tensor("op_5664_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5664_dilations_0 = const()[name = tensor("op_5664_dilations_0"), val = tensor([1, 1])]; + tensor var_5664_groups_0 = const()[name = tensor("op_5664_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410495104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411314368))), name = tensor("layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5664_cast_fp16 = conv(dilations = var_5664_dilations_0, groups = var_5664_groups_0, pad = var_5664_pad_0, pad_type = var_5664_pad_type_0, strides = var_5664_strides_0, weight = layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_5664_cast_fp16")]; + tensor var_5670_pad_type_0 = const()[name = tensor("op_5670_pad_type_0"), val = tensor("valid")]; + tensor var_5670_strides_0 = const()[name = tensor("op_5670_strides_0"), val = tensor([1, 1])]; + tensor var_5670_pad_0 = const()[name = tensor("op_5670_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5670_dilations_0 = const()[name = tensor("op_5670_dilations_0"), val = tensor([1, 1])]; + tensor var_5670_groups_0 = const()[name = tensor("op_5670_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411336768))), name = tensor("layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411314496))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5670_cast_fp16 = conv(dilations = var_5670_dilations_0, groups = var_5670_groups_0, pad = var_5670_pad_0, pad_type = var_5670_pad_type_0, strides = var_5670_strides_0, weight = layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_5670_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_5664_cast_fp16, y = var_5670_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_5680_pad_type_0 = const()[name = tensor("op_5680_pad_type_0"), val = tensor("valid")]; + tensor var_5680_strides_0 = const()[name = tensor("op_5680_strides_0"), val = tensor([1, 1])]; + tensor var_5680_pad_0 = const()[name = tensor("op_5680_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5680_dilations_0 = const()[name = tensor("op_5680_dilations_0"), val = tensor([1, 1])]; + tensor var_5680_groups_0 = const()[name = tensor("op_5680_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411541632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412360896))), name = tensor("layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_31_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412361024)))]; + tensor var_5680_cast_fp16 = conv(bias = layers_31_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5680_dilations_0, groups = var_5680_groups_0, pad = var_5680_pad_0, pad_type = var_5680_pad_type_0, strides = var_5680_strides_0, weight = layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_5680_cast_fp16")]; + tensor var_5686_pad_type_0 = const()[name = tensor("op_5686_pad_type_0"), val = tensor("valid")]; + tensor var_5686_strides_0 = const()[name = tensor("op_5686_strides_0"), val = tensor([1, 1])]; + tensor var_5686_pad_0 = const()[name = tensor("op_5686_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5686_dilations_0 = const()[name = tensor("op_5686_dilations_0"), val = tensor([1, 1])]; + tensor var_5686_groups_0 = const()[name = tensor("op_5686_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412376512))), name = tensor("layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412363648))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5686_cast_fp16 = conv(dilations = var_5686_dilations_0, groups = var_5686_groups_0, pad = var_5686_pad_0, pad_type = var_5686_pad_type_0, strides = var_5686_strides_0, weight = layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_5686_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_5680_cast_fp16, y = var_5686_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_5689 = const()[name = tensor("op_5689"), val = tensor([1, 20, 64, -1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_5689, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_5691_to_fp16 = const()[name = tensor("op_5691_to_fp16"), val = tensor(0x1p-3)]; + tensor var_5692_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_5691_to_fp16)[name = tensor("op_5692_cast_fp16")]; + tensor var_5693 = const()[name = tensor("op_5693"), val = tensor([1, 20, 64, -1])]; + tensor var_5694_cast_fp16 = reshape(shape = var_5693, x = key_cast_fp16)[name = tensor("op_5694_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_5692_cast_fp16, y = var_5694_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor var_5697_cast_fp16 = softmax(axis = var_5608, x = mh_w_cast_fp16)[name = tensor("op_5697_cast_fp16")]; + tensor var_5698 = const()[name = tensor("op_5698"), val = tensor([1, 20, 64, -1])]; + tensor var_5699_cast_fp16 = reshape(shape = var_5698, x = value_cast_fp16)[name = tensor("op_5699_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_5699_cast_fp16, y = var_5697_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_5702 = const()[name = tensor("op_5702"), val = tensor([1, 1280, 1, -1])]; + tensor input_249_cast_fp16 = reshape(shape = var_5702, x = attn_cast_fp16)[name = tensor("input_249_cast_fp16")]; + tensor var_5712_pad_type_0 = const()[name = tensor("op_5712_pad_type_0"), val = tensor("valid")]; + tensor var_5712_strides_0 = const()[name = tensor("op_5712_strides_0"), val = tensor([1, 1])]; + tensor var_5712_pad_0 = const()[name = tensor("op_5712_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5712_dilations_0 = const()[name = tensor("op_5712_dilations_0"), val = tensor([1, 1])]; + tensor var_5712_groups_0 = const()[name = tensor("op_5712_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412581376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413400640))), name = tensor("layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; + tensor layers_31_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413400768)))]; + tensor var_5712_cast_fp16 = conv(bias = layers_31_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5712_dilations_0, groups = var_5712_groups_0, pad = var_5712_pad_0, pad_type = var_5712_pad_type_0, strides = var_5712_strides_0, weight = layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = tensor("op_5712_cast_fp16")]; + tensor var_5718_pad_type_0 = const()[name = tensor("op_5718_pad_type_0"), val = tensor("valid")]; + tensor var_5718_strides_0 = const()[name = tensor("op_5718_strides_0"), val = tensor([1, 1])]; + tensor var_5718_pad_0 = const()[name = tensor("op_5718_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5718_dilations_0 = const()[name = tensor("op_5718_dilations_0"), val = tensor([1, 1])]; + tensor var_5718_groups_0 = const()[name = tensor("op_5718_groups_0"), val = tensor(1)]; + tensor layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413416000))), name = tensor("layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413403392))), shape = tensor([1280, 1280, 1, 1])]; + tensor var_5718_cast_fp16 = conv(dilations = var_5718_dilations_0, groups = var_5718_groups_0, pad = var_5718_pad_0, pad_type = var_5718_pad_type_0, strides = var_5718_strides_0, weight = layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_249_cast_fp16)[name = tensor("op_5718_cast_fp16")]; + tensor obj_cast_fp16 = add(x = var_5712_cast_fp16, y = var_5718_cast_fp16)[name = tensor("obj_cast_fp16")]; + tensor inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_127_cast_fp16")]; + tensor out_127_axes_0 = const()[name = tensor("out_127_axes_0"), val = tensor([1])]; + tensor var_5729_to_fp16 = const()[name = tensor("op_5729_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_5729_to_fp16, x = inputs_127_cast_fp16)[name = tensor("out_127_cast_fp16")]; + tensor input_251_gamma_0_to_fp16 = const()[name = tensor("input_251_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413620864)))]; + tensor input_251_beta_0_to_fp16 = const()[name = tensor("input_251_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413623488)))]; + tensor input_251_epsilon_0_to_fp16 = const()[name = tensor("input_251_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_251_cast_fp16 = batch_norm(beta = input_251_beta_0_to_fp16, epsilon = input_251_epsilon_0_to_fp16, gamma = input_251_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_127_cast_fp16)[name = tensor("input_251_cast_fp16")]; + tensor var_5747_pad_type_0 = const()[name = tensor("op_5747_pad_type_0"), val = tensor("valid")]; + tensor var_5747_strides_0 = const()[name = tensor("op_5747_strides_0"), val = tensor([1, 1])]; + tensor var_5747_pad_0 = const()[name = tensor("op_5747_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5747_dilations_0 = const()[name = tensor("op_5747_dilations_0"), val = tensor([1, 1])]; + tensor var_5747_groups_0 = const()[name = tensor("op_5747_groups_0"), val = tensor(1)]; + tensor layers_31_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413626112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416902976))), name = tensor("layers_31_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; + tensor layers_31_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416903104)))]; + tensor var_5747_cast_fp16 = conv(bias = layers_31_fc1_inlier_module_bias_to_fp16, dilations = var_5747_dilations_0, groups = var_5747_groups_0, pad = var_5747_pad_0, pad_type = var_5747_pad_type_0, strides = var_5747_strides_0, weight = layers_31_fc1_inlier_module_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = tensor("op_5747_cast_fp16")]; + tensor var_5753_pad_type_0 = const()[name = tensor("op_5753_pad_type_0"), val = tensor("valid")]; + tensor var_5753_strides_0 = const()[name = tensor("op_5753_strides_0"), val = tensor([1, 1])]; + tensor var_5753_pad_0 = const()[name = tensor("op_5753_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5753_dilations_0 = const()[name = tensor("op_5753_dilations_0"), val = tensor([1, 1])]; + tensor var_5753_groups_0 = const()[name = tensor("op_5753_groups_0"), val = tensor(1)]; + tensor layers_31_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416975104))), name = tensor("layers_31_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416913408))), shape = tensor([5120, 1280, 1, 1])]; + tensor var_5753_cast_fp16 = conv(dilations = var_5753_dilations_0, groups = var_5753_groups_0, pad = var_5753_pad_0, pad_type = var_5753_pad_type_0, strides = var_5753_strides_0, weight = layers_31_fc1_outlier_module_weight_to_fp16_sparsified, x = input_251_cast_fp16)[name = tensor("op_5753_cast_fp16")]; + tensor input_253_cast_fp16 = add(x = var_5747_cast_fp16, y = var_5753_cast_fp16)[name = tensor("input_253_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_253_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_5764_pad_type_0 = const()[name = tensor("op_5764_pad_type_0"), val = tensor("valid")]; + tensor var_5764_strides_0 = const()[name = tensor("op_5764_strides_0"), val = tensor([1, 1])]; + tensor var_5764_pad_0 = const()[name = tensor("op_5764_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5764_dilations_0 = const()[name = tensor("op_5764_dilations_0"), val = tensor([1, 1])]; + tensor var_5764_groups_0 = const()[name = tensor("op_5764_groups_0"), val = tensor(1)]; + tensor layers_31_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417794368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421071232))), name = tensor("layers_31_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; + tensor layers_31_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421071360)))]; + tensor var_5764_cast_fp16 = conv(bias = layers_31_fc2_inlier_module_bias_to_fp16, dilations = var_5764_dilations_0, groups = var_5764_groups_0, pad = var_5764_pad_0, pad_type = var_5764_pad_type_0, strides = var_5764_strides_0, weight = layers_31_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_5764_cast_fp16")]; + tensor var_5770_pad_type_0 = const()[name = tensor("op_5770_pad_type_0"), val = tensor("valid")]; + tensor var_5770_strides_0 = const()[name = tensor("op_5770_strides_0"), val = tensor([1, 1])]; + tensor var_5770_pad_0 = const()[name = tensor("op_5770_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5770_dilations_0 = const()[name = tensor("op_5770_dilations_0"), val = tensor([1, 1])]; + tensor var_5770_groups_0 = const()[name = tensor("op_5770_groups_0"), val = tensor(1)]; + tensor layers_31_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421144256))), name = tensor("layers_31_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421073984))), shape = tensor([1280, 5120, 1, 1])]; + tensor var_5770_cast_fp16 = conv(dilations = var_5770_dilations_0, groups = var_5770_groups_0, pad = var_5770_pad_0, pad_type = var_5770_pad_type_0, strides = var_5770_strides_0, weight = layers_31_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_5770_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = var_5764_cast_fp16, y = var_5770_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_5785_to_fp16 = const()[name = tensor("op_5785_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_5785_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421963520)))]; + tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421966144)))]; + tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; + } -> (encoder_output_embeds); +} \ No newline at end of file