diff --git "a/SmolLM2-1.7B-Instruct-4bit.mlmodelc/model.mil" "b/SmolLM2-1.7B-Instruct-4bit.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/SmolLM2-1.7B-Instruct-4bit.mlmodelc/model.mil" @@ -0,0 +1,3904 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] +{ + func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { + tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_7_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_7_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(3)]; + tensor var_7_shape_cast_fp16_to_int16 = cast(dtype = var_7_shape_cast_fp16_to_int16_dtype_0, x = var_7_shape_cast_fp16)[name = string("cast_104")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_7_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_10_shape_to_uint16_dtype_0 = const()[name = string("op_10_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_10_shape_to_uint16 = cast(dtype = var_10_shape_to_uint16_dtype_0, x = var_10_shape)[name = string("cast_103")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_10_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_101")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_102")]; + int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; + int32 var_69 = const()[name = string("op_69"), val = int32(-1)]; + int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; + int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; + bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; + tensor model_model_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50331776))))[name = string("model_model_embed_tokens_weight_to_fp16_quantized")]; + tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")]; + tensor var_153_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_153_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_153_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_153_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_153_shape_cast_fp16_to_uint16 = cast(dtype = var_153_shape_cast_fp16_to_uint16_dtype_0, x = var_153_shape_cast_fp16)[name = string("cast_100")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_153_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_99")]; + int32 var_155 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_155")]; + int32 const_0 = const()[name = string("const_0"), val = int32(1)]; + tensor cache_position = range_1d(end = var_155, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; + tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; + tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; + tensor var_168_axes_0 = const()[name = string("op_168_axes_0"), val = tensor([1])]; + tensor var_168 = expand_dims(axes = var_168_axes_0, x = position_ids)[name = string("op_168")]; + bool var_173_transpose_x_0 = const()[name = string("op_173_transpose_x_0"), val = bool(false)]; + bool var_173_transpose_y_0 = const()[name = string("op_173_transpose_y_0"), val = bool(false)]; + tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56623296)))]; + string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_168_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_168)[name = string("cast_98")]; + tensor var_173_cast_fp16 = matmul(transpose_x = var_173_transpose_x_0, transpose_y = var_173_transpose_y_0, x = const_2_to_fp16, y = var_168_to_fp16)[name = string("op_173_cast_fp16")]; + tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; + bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; + tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_173_cast_fp16)[name = string("transpose_96")]; + tensor emb_cast_fp16 = concat(axis = var_69, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; + tensor cos_1_cast_fp16 = cos(x = emb_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_cast_fp16 = sin(x = emb_cast_fp16)[name = string("sin_1_cast_fp16")]; + fp16 var_64_promoted_to_fp16 = const()[name = string("op_64_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor var_194_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_64_promoted_to_fp16)[name = string("op_194_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_194_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_198_cast_fp16 = add(x = variance_1_cast_fp16, y = var_197_to_fp16)[name = string("op_198_cast_fp16")]; + fp32 var_199_epsilon_0 = const()[name = string("op_199_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_199_cast_fp16 = rsqrt(epsilon = var_199_epsilon_0, x = var_198_cast_fp16)[name = string("op_199_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_199_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56623424)))]; + tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_210_shape_cast_fp16 = shape(x = hidden_states_7_cast_fp16)[name = string("op_210_shape_cast_fp16")]; + int32 gather_4 = const()[name = string("gather_4"), val = int32(1)]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_210_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_210_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_210_shape_cast_fp16_to_uint16 = cast(dtype = var_210_shape_cast_fp16_to_uint16_dtype_0, x = var_210_shape_cast_fp16)[name = string("cast_97")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_210_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56627584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58724800))))[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58987008)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58991168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61088384))))[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61350592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63447808))))[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 32, 64])]; + tensor var_219_cast_fp16 = reshape(shape = concat_0x, x = linear_0_cast_fp16)[name = string("op_219_cast_fp16")]; + tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 32, 64])]; + tensor var_222_cast_fp16 = reshape(shape = concat_1x, x = linear_1_cast_fp16)[name = string("op_222_cast_fp16")]; + tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 32, 64])]; + tensor var_225_cast_fp16 = reshape(shape = concat_2x, x = linear_2_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; + tensor cos_7_cast_fp16 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7_cast_fp16")]; + tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; + tensor sin_7_cast_fp16 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7_cast_fp16")]; + tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_219_cast_fp16)[name = string("transpose_95")]; + tensor var_229_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_229_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_240_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_240_cast_fp16")]; + bool var_242_interleave_0 = const()[name = string("op_242_interleave_0"), val = bool(false)]; + tensor var_242_cast_fp16 = concat(axis = var_69, interleave = var_242_interleave_0, values = (var_240_cast_fp16, x1_1_cast_fp16))[name = string("op_242_cast_fp16")]; + tensor var_243_cast_fp16 = mul(x = var_242_cast_fp16, y = sin_7_cast_fp16)[name = string("op_243_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_229_cast_fp16, y = var_243_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_222_cast_fp16)[name = string("transpose_94")]; + tensor var_245_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_245_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_256_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_256_cast_fp16")]; + bool var_258_interleave_0 = const()[name = string("op_258_interleave_0"), val = bool(false)]; + tensor var_258_cast_fp16 = concat(axis = var_69, interleave = var_258_interleave_0, values = (var_256_cast_fp16, x1_3_cast_fp16))[name = string("op_258_cast_fp16")]; + tensor var_259_cast_fp16 = mul(x = var_258_cast_fp16, y = sin_7_cast_fp16)[name = string("op_259_cast_fp16")]; + tensor k_state_1_cast_fp16 = add(x = var_245_cast_fp16, y = var_259_cast_fp16)[name = string("k_state_1_cast_fp16")]; + tensor var_261_shape = shape(x = cache_position)[name = string("op_261_shape")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_261_shape_to_uint16_dtype_0 = const()[name = string("op_261_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; + tensor var_261_shape_to_uint16 = cast(dtype = var_261_shape_to_uint16_dtype_0, x = var_261_shape)[name = string("cast_96")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_261_shape_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_95")]; + int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; + tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([32])]; + tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; + tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_5")]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values4_0 = const()[name = string("concat_6_values4_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_4, expand_dims_5, concat_6_values4_0))[name = string("concat_6")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_48")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_225_cast_fp16)[name = string("transpose_93")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_49")]; + tensor var_282_begin_0 = const()[name = string("op_282_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_282_end_0 = const()[name = string("op_282_end_0"), val = tensor([1, 1, 32, 2048, 64])]; + tensor var_282_end_mask_0 = const()[name = string("op_282_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_282_squeeze_mask_0 = const()[name = string("op_282_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, squeeze_mask = var_282_squeeze_mask_0, x = coreml_update_state_48)[name = string("op_282_cast_fp16")]; + int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; + int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(32)]; + int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(64)]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; + tensor var_285_begin_0 = const()[name = string("op_285_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_end_mask_0 = const()[name = string("op_285_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = concat_11, end_mask = var_285_end_mask_0, x = var_282_cast_fp16)[name = string("op_285_cast_fp16")]; + tensor var_287_begin_0 = const()[name = string("op_287_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_287_end_0 = const()[name = string("op_287_end_0"), val = tensor([1, 1, 32, 2048, 64])]; + tensor var_287_end_mask_0 = const()[name = string("op_287_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_287_squeeze_mask_0 = const()[name = string("op_287_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, squeeze_mask = var_287_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_287_cast_fp16")]; + tensor var_290_begin_0 = const()[name = string("op_290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_290_end_mask_0 = const()[name = string("op_290_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = concat_11, end_mask = var_290_end_mask_0, x = var_287_cast_fp16)[name = string("op_290_cast_fp16")]; + tensor var_292_shape_cast_fp16 = shape(x = var_285_cast_fp16)[name = string("op_292_shape_cast_fp16")]; + int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)]; + int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)]; + bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)]; + string var_292_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_292_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(2)]; + tensor var_292_shape_cast_fp16_to_uint16 = cast(dtype = var_292_shape_cast_fp16_to_uint16_dtype_0, x = var_292_shape_cast_fp16)[name = string("cast_94")]; + uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_292_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")]; + string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_13_values0_0 = const()[name = string("concat_13_values0_0"), val = int32(1)]; + int32 concat_13_values1_0 = const()[name = string("concat_13_values1_0"), val = int32(1)]; + int32 concat_13_values2_0 = const()[name = string("concat_13_values2_0"), val = int32(0)]; + int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; + bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; + int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_93")]; + tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (concat_13_values0_0, concat_13_values1_0, concat_13_values2_0, gather_13_cast_uint16_to_int32))[name = string("concat_13")]; + tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_3_cast_fp16 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_13, end_mask = causal_mask_3_end_mask_0, x = causal_mask)[name = string("causal_mask_3_cast_fp16")]; + tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = var_285_cast_fp16, query = query_states_3_cast_fp16, value = var_290_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_298_perm_0 = const()[name = string("op_298_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_92")]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (gather_4, gather_5_cast_uint16_to_int32, var_69))[name = string("concat_14")]; + tensor var_298_cast_fp16 = transpose(perm = var_298_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_92")]; + tensor input_1_cast_fp16 = reshape(shape = concat_14, x = var_298_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63710016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65807232))))[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + fp16 var_64_promoted_1_to_fp16 = const()[name = string("op_64_promoted_1_to_fp16"), val = fp16(0x1p+1)]; + tensor var_307_cast_fp16 = pow(x = hidden_states_15_cast_fp16, y = var_64_promoted_1_to_fp16)[name = string("op_307_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_307_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_311_cast_fp16 = add(x = variance_3_cast_fp16, y = var_310_to_fp16)[name = string("op_311_cast_fp16")]; + fp32 var_312_epsilon_0 = const()[name = string("op_312_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_312_cast_fp16 = rsqrt(epsilon = var_312_epsilon_0, x = var_311_cast_fp16)[name = string("op_312_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = mul(x = hidden_states_15_cast_fp16, y = var_312_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66069440)))]; + tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_19_cast_fp16)[name = string("input_3_cast_fp16")]; + tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66073600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74462272))))[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75510912)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor var_324_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_324_cast_fp16")]; + tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75527360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83916032))))[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_324_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84964672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93353344))))[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; + fp16 var_64_promoted_2_to_fp16 = const()[name = string("op_64_promoted_2_to_fp16"), val = fp16(0x1p+1)]; + tensor var_337_cast_fp16 = pow(x = hidden_states_25_cast_fp16, y = var_64_promoted_2_to_fp16)[name = string("op_337_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_337_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_341_cast_fp16 = add(x = variance_5_cast_fp16, y = var_340_to_fp16)[name = string("op_341_cast_fp16")]; + fp32 var_342_epsilon_0 = const()[name = string("op_342_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_342_cast_fp16 = rsqrt(epsilon = var_342_epsilon_0, x = var_341_cast_fp16)[name = string("op_342_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = var_342_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94401984)))]; + tensor hidden_states_33_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor var_353_shape_cast_fp16 = shape(x = hidden_states_33_cast_fp16)[name = string("op_353_shape_cast_fp16")]; + int32 gather_14 = const()[name = string("gather_14"), val = int32(1)]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_353_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_353_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)]; + tensor var_353_shape_cast_fp16_to_uint16 = cast(dtype = var_353_shape_cast_fp16_to_uint16_dtype_0, x = var_353_shape_cast_fp16)[name = string("cast_91")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_353_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94406144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96503360))))[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_33_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96765568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98862784))))[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_33_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99124992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101222208))))[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_9_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_33_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor concat_15x = const()[name = string("concat_15x"), val = tensor([1, -1, 32, 64])]; + tensor var_362_cast_fp16 = reshape(shape = concat_15x, x = linear_7_cast_fp16)[name = string("op_362_cast_fp16")]; + tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, -1, 32, 64])]; + tensor var_365_cast_fp16 = reshape(shape = concat_16x, x = linear_8_cast_fp16)[name = string("op_365_cast_fp16")]; + tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 32, 64])]; + tensor var_368_cast_fp16 = reshape(shape = concat_17x, x = linear_9_cast_fp16)[name = string("op_368_cast_fp16")]; + tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_362_cast_fp16)[name = string("transpose_91")]; + tensor var_372_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_372_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_383_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_383_cast_fp16")]; + bool var_385_interleave_0 = const()[name = string("op_385_interleave_0"), val = bool(false)]; + tensor var_385_cast_fp16 = concat(axis = var_69, interleave = var_385_interleave_0, values = (var_383_cast_fp16, x1_5_cast_fp16))[name = string("op_385_cast_fp16")]; + tensor var_386_cast_fp16 = mul(x = var_385_cast_fp16, y = sin_7_cast_fp16)[name = string("op_386_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_372_cast_fp16, y = var_386_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_365_cast_fp16)[name = string("transpose_90")]; + tensor var_388_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_388_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_399_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_399_cast_fp16")]; + bool var_401_interleave_0 = const()[name = string("op_401_interleave_0"), val = bool(false)]; + tensor var_401_cast_fp16 = concat(axis = var_69, interleave = var_401_interleave_0, values = (var_399_cast_fp16, x1_7_cast_fp16))[name = string("op_401_cast_fp16")]; + tensor var_402_cast_fp16 = mul(x = var_401_cast_fp16, y = sin_7_cast_fp16)[name = string("op_402_cast_fp16")]; + tensor k_state_3_cast_fp16 = add(x = var_388_cast_fp16, y = var_402_cast_fp16)[name = string("k_state_3_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor concat_20_values0_0 = const()[name = string("concat_20_values0_0"), val = tensor([1])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (concat_20_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_20")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_20, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_50")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_368_cast_fp16)[name = string("transpose_89")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_20, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_51")]; + tensor var_425_begin_0 = const()[name = string("op_425_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_425_end_0 = const()[name = string("op_425_end_0"), val = tensor([2, 1, 32, 2048, 64])]; + tensor var_425_end_mask_0 = const()[name = string("op_425_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_425_squeeze_mask_0 = const()[name = string("op_425_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_425_cast_fp16 = slice_by_index(begin = var_425_begin_0, end = var_425_end_0, end_mask = var_425_end_mask_0, squeeze_mask = var_425_squeeze_mask_0, x = coreml_update_state_50)[name = string("op_425_cast_fp16")]; + tensor var_428_begin_0 = const()[name = string("op_428_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_428_end_mask_0 = const()[name = string("op_428_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = concat_11, end_mask = var_428_end_mask_0, x = var_425_cast_fp16)[name = string("op_428_cast_fp16")]; + tensor var_430_begin_0 = const()[name = string("op_430_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_430_end_0 = const()[name = string("op_430_end_0"), val = tensor([2, 1, 32, 2048, 64])]; + tensor var_430_end_mask_0 = const()[name = string("op_430_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_430_squeeze_mask_0 = const()[name = string("op_430_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_430_cast_fp16 = slice_by_index(begin = var_430_begin_0, end = var_430_end_0, end_mask = var_430_end_mask_0, squeeze_mask = var_430_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_430_cast_fp16")]; + tensor var_433_begin_0 = const()[name = string("op_433_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_433_end_mask_0 = const()[name = string("op_433_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_433_cast_fp16 = slice_by_index(begin = var_433_begin_0, end = concat_11, end_mask = var_433_end_mask_0, x = var_430_cast_fp16)[name = string("op_433_cast_fp16")]; + tensor var_435_shape_cast_fp16 = shape(x = var_428_cast_fp16)[name = string("op_435_shape_cast_fp16")]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_435_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_435_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(2)]; + tensor var_435_shape_cast_fp16_to_uint16 = cast(dtype = var_435_shape_cast_fp16_to_uint16_dtype_0, x = var_435_shape_cast_fp16)[name = string("cast_90")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_435_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_28_values0_0 = const()[name = string("concat_28_values0_0"), val = int32(1)]; + int32 concat_28_values1_0 = const()[name = string("concat_28_values1_0"), val = int32(1)]; + int32 concat_28_values2_0 = const()[name = string("concat_28_values2_0"), val = int32(0)]; + int32 concat_28_axis_0 = const()[name = string("concat_28_axis_0"), val = int32(0)]; + bool concat_28_interleave_0 = const()[name = string("concat_28_interleave_0"), val = bool(false)]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_89")]; + tensor concat_28 = concat(axis = concat_28_axis_0, interleave = concat_28_interleave_0, values = (concat_28_values0_0, concat_28_values1_0, concat_28_values2_0, gather_23_cast_uint16_to_int32))[name = string("concat_28")]; + tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_5_cast_fp16 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_28, end_mask = causal_mask_5_end_mask_0, x = causal_mask)[name = string("causal_mask_5_cast_fp16")]; + tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = var_428_cast_fp16, query = query_states_7_cast_fp16, value = var_433_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_441_perm_0 = const()[name = string("op_441_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_29_axis_0 = const()[name = string("concat_29_axis_0"), val = int32(0)]; + bool concat_29_interleave_0 = const()[name = string("concat_29_interleave_0"), val = bool(false)]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_88")]; + tensor concat_29 = concat(axis = concat_29_axis_0, interleave = concat_29_interleave_0, values = (gather_14, gather_15_cast_uint16_to_int32, var_69))[name = string("concat_29")]; + tensor var_441_cast_fp16 = transpose(perm = var_441_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_88")]; + tensor input_9_cast_fp16 = reshape(shape = concat_29, x = var_441_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101484416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103581632))))[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + fp16 var_64_promoted_3_to_fp16 = const()[name = string("op_64_promoted_3_to_fp16"), val = fp16(0x1p+1)]; + tensor var_450_cast_fp16 = pow(x = hidden_states_41_cast_fp16, y = var_64_promoted_3_to_fp16)[name = string("op_450_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_450_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_453_to_fp16 = const()[name = string("op_453_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_454_cast_fp16 = add(x = variance_7_cast_fp16, y = var_453_to_fp16)[name = string("op_454_cast_fp16")]; + fp32 var_455_epsilon_0 = const()[name = string("op_455_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_455_cast_fp16 = rsqrt(epsilon = var_455_epsilon_0, x = var_454_cast_fp16)[name = string("op_455_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = var_455_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103843840)))]; + tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_45_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103848000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112236672))))[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor var_467_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_467_cast_fp16")]; + tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113285312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121673984))))[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_467_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122722624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131111296))))[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + fp16 var_64_promoted_4_to_fp16 = const()[name = string("op_64_promoted_4_to_fp16"), val = fp16(0x1p+1)]; + tensor var_480_cast_fp16 = pow(x = hidden_states_51_cast_fp16, y = var_64_promoted_4_to_fp16)[name = string("op_480_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_480_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_483_to_fp16 = const()[name = string("op_483_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_484_cast_fp16 = add(x = variance_9_cast_fp16, y = var_483_to_fp16)[name = string("op_484_cast_fp16")]; + fp32 var_485_epsilon_0 = const()[name = string("op_485_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_485_cast_fp16 = rsqrt(epsilon = var_485_epsilon_0, x = var_484_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor hidden_states_55_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = var_485_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132159936)))]; + tensor hidden_states_59_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_55_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor var_496_shape_cast_fp16 = shape(x = hidden_states_59_cast_fp16)[name = string("op_496_shape_cast_fp16")]; + int32 gather_24 = const()[name = string("gather_24"), val = int32(1)]; + int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)]; + int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)]; + bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)]; + string var_496_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_496_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)]; + tensor var_496_shape_cast_fp16_to_uint16 = cast(dtype = var_496_shape_cast_fp16_to_uint16_dtype_0, x = var_496_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_496_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")]; + string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132164096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134261312))))[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_59_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134523520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136620736))))[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_59_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136882944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138980160))))[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_59_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor concat_30x = const()[name = string("concat_30x"), val = tensor([1, -1, 32, 64])]; + tensor var_505_cast_fp16 = reshape(shape = concat_30x, x = linear_14_cast_fp16)[name = string("op_505_cast_fp16")]; + tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_31x = const()[name = string("concat_31x"), val = tensor([1, -1, 32, 64])]; + tensor var_508_cast_fp16 = reshape(shape = concat_31x, x = linear_15_cast_fp16)[name = string("op_508_cast_fp16")]; + tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_32x = const()[name = string("concat_32x"), val = tensor([1, -1, 32, 64])]; + tensor var_511_cast_fp16 = reshape(shape = concat_32x, x = linear_16_cast_fp16)[name = string("op_511_cast_fp16")]; + tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_505_cast_fp16)[name = string("transpose_87")]; + tensor var_515_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_515_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_526_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_526_cast_fp16")]; + bool var_528_interleave_0 = const()[name = string("op_528_interleave_0"), val = bool(false)]; + tensor var_528_cast_fp16 = concat(axis = var_69, interleave = var_528_interleave_0, values = (var_526_cast_fp16, x1_9_cast_fp16))[name = string("op_528_cast_fp16")]; + tensor var_529_cast_fp16 = mul(x = var_528_cast_fp16, y = sin_7_cast_fp16)[name = string("op_529_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_515_cast_fp16, y = var_529_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_508_cast_fp16)[name = string("transpose_86")]; + tensor var_531_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_531_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_542_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_542_cast_fp16")]; + bool var_544_interleave_0 = const()[name = string("op_544_interleave_0"), val = bool(false)]; + tensor var_544_cast_fp16 = concat(axis = var_69, interleave = var_544_interleave_0, values = (var_542_cast_fp16, x1_11_cast_fp16))[name = string("op_544_cast_fp16")]; + tensor var_545_cast_fp16 = mul(x = var_544_cast_fp16, y = sin_7_cast_fp16)[name = string("op_545_cast_fp16")]; + tensor k_state_5_cast_fp16 = add(x = var_531_cast_fp16, y = var_545_cast_fp16)[name = string("k_state_5_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor concat_35_values0_0 = const()[name = string("concat_35_values0_0"), val = tensor([2])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (concat_35_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_35")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_35, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_52")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_511_cast_fp16)[name = string("transpose_85")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_35, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_53")]; + tensor var_568_begin_0 = const()[name = string("op_568_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_568_end_0 = const()[name = string("op_568_end_0"), val = tensor([3, 1, 32, 2048, 64])]; + tensor var_568_end_mask_0 = const()[name = string("op_568_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_568_squeeze_mask_0 = const()[name = string("op_568_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_568_cast_fp16 = slice_by_index(begin = var_568_begin_0, end = var_568_end_0, end_mask = var_568_end_mask_0, squeeze_mask = var_568_squeeze_mask_0, x = coreml_update_state_52)[name = string("op_568_cast_fp16")]; + tensor var_571_begin_0 = const()[name = string("op_571_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_571_end_mask_0 = const()[name = string("op_571_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_571_cast_fp16 = slice_by_index(begin = var_571_begin_0, end = concat_11, end_mask = var_571_end_mask_0, x = var_568_cast_fp16)[name = string("op_571_cast_fp16")]; + tensor var_573_begin_0 = const()[name = string("op_573_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_573_end_0 = const()[name = string("op_573_end_0"), val = tensor([3, 1, 32, 2048, 64])]; + tensor var_573_end_mask_0 = const()[name = string("op_573_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_573_squeeze_mask_0 = const()[name = string("op_573_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_573_cast_fp16 = slice_by_index(begin = var_573_begin_0, end = var_573_end_0, end_mask = var_573_end_mask_0, squeeze_mask = var_573_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_573_cast_fp16")]; + tensor var_576_begin_0 = const()[name = string("op_576_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_576_end_mask_0 = const()[name = string("op_576_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_576_cast_fp16 = slice_by_index(begin = var_576_begin_0, end = concat_11, end_mask = var_576_end_mask_0, x = var_573_cast_fp16)[name = string("op_576_cast_fp16")]; + tensor var_578_shape_cast_fp16 = shape(x = var_571_cast_fp16)[name = string("op_578_shape_cast_fp16")]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_578_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_578_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; + tensor var_578_shape_cast_fp16_to_uint16 = cast(dtype = var_578_shape_cast_fp16_to_uint16_dtype_0, x = var_578_shape_cast_fp16)[name = string("cast_86")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_578_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_43_values0_0 = const()[name = string("concat_43_values0_0"), val = int32(1)]; + int32 concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = int32(1)]; + int32 concat_43_values2_0 = const()[name = string("concat_43_values2_0"), val = int32(0)]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_85")]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (concat_43_values0_0, concat_43_values1_0, concat_43_values2_0, gather_33_cast_uint16_to_int32))[name = string("concat_43")]; + tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_7_cast_fp16 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_43, end_mask = causal_mask_7_end_mask_0, x = causal_mask)[name = string("causal_mask_7_cast_fp16")]; + tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = var_571_cast_fp16, query = query_states_11_cast_fp16, value = var_576_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_584_perm_0 = const()[name = string("op_584_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; + bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; + int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_84")]; + tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (gather_24, gather_25_cast_uint16_to_int32, var_69))[name = string("concat_44")]; + tensor var_584_cast_fp16 = transpose(perm = var_584_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_84")]; + tensor input_17_cast_fp16 = reshape(shape = concat_44, x = var_584_cast_fp16)[name = string("input_17_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139242368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141339584))))[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_17_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized, x = input_17_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor hidden_states_67_cast_fp16 = add(x = hidden_states_51_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; + fp16 var_64_promoted_5_to_fp16 = const()[name = string("op_64_promoted_5_to_fp16"), val = fp16(0x1p+1)]; + tensor var_593_cast_fp16 = pow(x = hidden_states_67_cast_fp16, y = var_64_promoted_5_to_fp16)[name = string("op_593_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_593_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_596_to_fp16 = const()[name = string("op_596_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_597_cast_fp16 = add(x = variance_11_cast_fp16, y = var_596_to_fp16)[name = string("op_597_cast_fp16")]; + fp32 var_598_epsilon_0 = const()[name = string("op_598_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_598_cast_fp16 = rsqrt(epsilon = var_598_epsilon_0, x = var_597_cast_fp16)[name = string("op_598_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = mul(x = hidden_states_67_cast_fp16, y = var_598_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141601792)))]; + tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_71_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141605952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149994624))))[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_610_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_610_cast_fp16")]; + tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151043264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159431936))))[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_610_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; + tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160480576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168869248))))[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor hidden_states_77_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; + fp16 var_64_promoted_6_to_fp16 = const()[name = string("op_64_promoted_6_to_fp16"), val = fp16(0x1p+1)]; + tensor var_623_cast_fp16 = pow(x = hidden_states_77_cast_fp16, y = var_64_promoted_6_to_fp16)[name = string("op_623_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_623_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_626_to_fp16 = const()[name = string("op_626_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_627_cast_fp16 = add(x = variance_13_cast_fp16, y = var_626_to_fp16)[name = string("op_627_cast_fp16")]; + fp32 var_628_epsilon_0 = const()[name = string("op_628_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_628_cast_fp16 = rsqrt(epsilon = var_628_epsilon_0, x = var_627_cast_fp16)[name = string("op_628_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = var_628_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169917888)))]; + tensor hidden_states_85_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_81_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; + tensor var_639_shape_cast_fp16 = shape(x = hidden_states_85_cast_fp16)[name = string("op_639_shape_cast_fp16")]; + int32 gather_34 = const()[name = string("gather_34"), val = int32(1)]; + int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)]; + int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)]; + bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)]; + string var_639_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_639_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)]; + tensor var_639_shape_cast_fp16_to_uint16 = cast(dtype = var_639_shape_cast_fp16_to_uint16_dtype_0, x = var_639_shape_cast_fp16)[name = string("cast_83")]; + uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_639_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")]; + string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169922048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172019264))))[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_85_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172281472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174378688))))[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_85_cast_fp16)[name = string("linear_22_cast_fp16")]; + tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174640896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176738112))))[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_23_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_85_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 32, 64])]; + tensor var_648_cast_fp16 = reshape(shape = concat_45x, x = linear_21_cast_fp16)[name = string("op_648_cast_fp16")]; + tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_46x = const()[name = string("concat_46x"), val = tensor([1, -1, 32, 64])]; + tensor var_651_cast_fp16 = reshape(shape = concat_46x, x = linear_22_cast_fp16)[name = string("op_651_cast_fp16")]; + tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_47x = const()[name = string("concat_47x"), val = tensor([1, -1, 32, 64])]; + tensor var_654_cast_fp16 = reshape(shape = concat_47x, x = linear_23_cast_fp16)[name = string("op_654_cast_fp16")]; + tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_648_cast_fp16)[name = string("transpose_83")]; + tensor var_658_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_658_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_669_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_669_cast_fp16")]; + bool var_671_interleave_0 = const()[name = string("op_671_interleave_0"), val = bool(false)]; + tensor var_671_cast_fp16 = concat(axis = var_69, interleave = var_671_interleave_0, values = (var_669_cast_fp16, x1_13_cast_fp16))[name = string("op_671_cast_fp16")]; + tensor var_672_cast_fp16 = mul(x = var_671_cast_fp16, y = sin_7_cast_fp16)[name = string("op_672_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_658_cast_fp16, y = var_672_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_651_cast_fp16)[name = string("transpose_82")]; + tensor var_674_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_674_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_685_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_685_cast_fp16")]; + bool var_687_interleave_0 = const()[name = string("op_687_interleave_0"), val = bool(false)]; + tensor var_687_cast_fp16 = concat(axis = var_69, interleave = var_687_interleave_0, values = (var_685_cast_fp16, x1_15_cast_fp16))[name = string("op_687_cast_fp16")]; + tensor var_688_cast_fp16 = mul(x = var_687_cast_fp16, y = sin_7_cast_fp16)[name = string("op_688_cast_fp16")]; + tensor k_state_7_cast_fp16 = add(x = var_674_cast_fp16, y = var_688_cast_fp16)[name = string("k_state_7_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor concat_50_values0_0 = const()[name = string("concat_50_values0_0"), val = tensor([3])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (concat_50_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_50")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_50, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_54")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_654_cast_fp16)[name = string("transpose_81")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_50, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_55")]; + tensor var_711_begin_0 = const()[name = string("op_711_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_711_end_0 = const()[name = string("op_711_end_0"), val = tensor([4, 1, 32, 2048, 64])]; + tensor var_711_end_mask_0 = const()[name = string("op_711_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_711_squeeze_mask_0 = const()[name = string("op_711_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_711_cast_fp16 = slice_by_index(begin = var_711_begin_0, end = var_711_end_0, end_mask = var_711_end_mask_0, squeeze_mask = var_711_squeeze_mask_0, x = coreml_update_state_54)[name = string("op_711_cast_fp16")]; + tensor var_714_begin_0 = const()[name = string("op_714_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_714_end_mask_0 = const()[name = string("op_714_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_714_cast_fp16 = slice_by_index(begin = var_714_begin_0, end = concat_11, end_mask = var_714_end_mask_0, x = var_711_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor var_716_begin_0 = const()[name = string("op_716_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_716_end_0 = const()[name = string("op_716_end_0"), val = tensor([4, 1, 32, 2048, 64])]; + tensor var_716_end_mask_0 = const()[name = string("op_716_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_716_squeeze_mask_0 = const()[name = string("op_716_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_716_cast_fp16 = slice_by_index(begin = var_716_begin_0, end = var_716_end_0, end_mask = var_716_end_mask_0, squeeze_mask = var_716_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_716_cast_fp16")]; + tensor var_719_begin_0 = const()[name = string("op_719_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_719_end_mask_0 = const()[name = string("op_719_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_719_cast_fp16 = slice_by_index(begin = var_719_begin_0, end = concat_11, end_mask = var_719_end_mask_0, x = var_716_cast_fp16)[name = string("op_719_cast_fp16")]; + tensor var_721_shape_cast_fp16 = shape(x = var_714_cast_fp16)[name = string("op_721_shape_cast_fp16")]; + int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)]; + int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)]; + bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)]; + string var_721_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_721_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(2)]; + tensor var_721_shape_cast_fp16_to_uint16 = cast(dtype = var_721_shape_cast_fp16_to_uint16_dtype_0, x = var_721_shape_cast_fp16)[name = string("cast_82")]; + uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_721_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")]; + string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_58_values0_0 = const()[name = string("concat_58_values0_0"), val = int32(1)]; + int32 concat_58_values1_0 = const()[name = string("concat_58_values1_0"), val = int32(1)]; + int32 concat_58_values2_0 = const()[name = string("concat_58_values2_0"), val = int32(0)]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_81")]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (concat_58_values0_0, concat_58_values1_0, concat_58_values2_0, gather_43_cast_uint16_to_int32))[name = string("concat_58")]; + tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_9_cast_fp16 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_58, end_mask = causal_mask_9_end_mask_0, x = causal_mask)[name = string("causal_mask_9_cast_fp16")]; + tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = var_714_cast_fp16, query = query_states_15_cast_fp16, value = var_719_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_727_perm_0 = const()[name = string("op_727_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_80")]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_34, gather_35_cast_uint16_to_int32, var_69))[name = string("concat_59")]; + tensor var_727_cast_fp16 = transpose(perm = var_727_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_80")]; + tensor input_25_cast_fp16 = reshape(shape = concat_59, x = var_727_cast_fp16)[name = string("input_25_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177000320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179097536))))[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = add(x = hidden_states_77_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + fp16 var_64_promoted_7_to_fp16 = const()[name = string("op_64_promoted_7_to_fp16"), val = fp16(0x1p+1)]; + tensor var_736_cast_fp16 = pow(x = hidden_states_93_cast_fp16, y = var_64_promoted_7_to_fp16)[name = string("op_736_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_736_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_740_cast_fp16 = add(x = variance_15_cast_fp16, y = var_739_to_fp16)[name = string("op_740_cast_fp16")]; + fp32 var_741_epsilon_0 = const()[name = string("op_741_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_741_cast_fp16 = rsqrt(epsilon = var_741_epsilon_0, x = var_740_cast_fp16)[name = string("op_741_cast_fp16")]; + tensor hidden_states_97_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = var_741_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179359744)))]; + tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_97_cast_fp16)[name = string("input_27_cast_fp16")]; + tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179363904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187752576))))[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_753_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_753_cast_fp16")]; + tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188801216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197189888))))[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_753_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; + tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198238528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206627200))))[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_27_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor hidden_states_103_cast_fp16 = add(x = hidden_states_93_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; + fp16 var_64_promoted_8_to_fp16 = const()[name = string("op_64_promoted_8_to_fp16"), val = fp16(0x1p+1)]; + tensor var_766_cast_fp16 = pow(x = hidden_states_103_cast_fp16, y = var_64_promoted_8_to_fp16)[name = string("op_766_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_766_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_769_to_fp16 = const()[name = string("op_769_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_770_cast_fp16 = add(x = variance_17_cast_fp16, y = var_769_to_fp16)[name = string("op_770_cast_fp16")]; + fp32 var_771_epsilon_0 = const()[name = string("op_771_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_771_cast_fp16 = rsqrt(epsilon = var_771_epsilon_0, x = var_770_cast_fp16)[name = string("op_771_cast_fp16")]; + tensor hidden_states_107_cast_fp16 = mul(x = hidden_states_103_cast_fp16, y = var_771_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207675840)))]; + tensor hidden_states_111_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_107_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor var_782_shape_cast_fp16 = shape(x = hidden_states_111_cast_fp16)[name = string("op_782_shape_cast_fp16")]; + int32 gather_44 = const()[name = string("gather_44"), val = int32(1)]; + int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)]; + int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)]; + bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)]; + string var_782_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_782_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)]; + tensor var_782_shape_cast_fp16_to_uint16 = cast(dtype = var_782_shape_cast_fp16_to_uint16_dtype_0, x = var_782_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_782_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")]; + string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207680000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209777216))))[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_111_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210039424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212136640))))[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_29_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_111_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212398848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214496064))))[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_111_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor concat_60x = const()[name = string("concat_60x"), val = tensor([1, -1, 32, 64])]; + tensor var_791_cast_fp16 = reshape(shape = concat_60x, x = linear_28_cast_fp16)[name = string("op_791_cast_fp16")]; + tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 32, 64])]; + tensor var_794_cast_fp16 = reshape(shape = concat_61x, x = linear_29_cast_fp16)[name = string("op_794_cast_fp16")]; + tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_62x = const()[name = string("concat_62x"), val = tensor([1, -1, 32, 64])]; + tensor var_797_cast_fp16 = reshape(shape = concat_62x, x = linear_30_cast_fp16)[name = string("op_797_cast_fp16")]; + tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_791_cast_fp16)[name = string("transpose_79")]; + tensor var_801_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_801_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_812_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_812_cast_fp16")]; + bool var_814_interleave_0 = const()[name = string("op_814_interleave_0"), val = bool(false)]; + tensor var_814_cast_fp16 = concat(axis = var_69, interleave = var_814_interleave_0, values = (var_812_cast_fp16, x1_17_cast_fp16))[name = string("op_814_cast_fp16")]; + tensor var_815_cast_fp16 = mul(x = var_814_cast_fp16, y = sin_7_cast_fp16)[name = string("op_815_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_801_cast_fp16, y = var_815_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_794_cast_fp16)[name = string("transpose_78")]; + tensor var_817_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_817_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_828_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_828_cast_fp16")]; + bool var_830_interleave_0 = const()[name = string("op_830_interleave_0"), val = bool(false)]; + tensor var_830_cast_fp16 = concat(axis = var_69, interleave = var_830_interleave_0, values = (var_828_cast_fp16, x1_19_cast_fp16))[name = string("op_830_cast_fp16")]; + tensor var_831_cast_fp16 = mul(x = var_830_cast_fp16, y = sin_7_cast_fp16)[name = string("op_831_cast_fp16")]; + tensor k_state_9_cast_fp16 = add(x = var_817_cast_fp16, y = var_831_cast_fp16)[name = string("k_state_9_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor concat_65_values0_0 = const()[name = string("concat_65_values0_0"), val = tensor([4])]; + int32 concat_65_axis_0 = const()[name = string("concat_65_axis_0"), val = int32(0)]; + bool concat_65_interleave_0 = const()[name = string("concat_65_interleave_0"), val = bool(false)]; + tensor concat_65 = concat(axis = concat_65_axis_0, interleave = concat_65_interleave_0, values = (concat_65_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_65")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_65, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_56")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_797_cast_fp16)[name = string("transpose_77")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_65, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_57")]; + tensor var_854_begin_0 = const()[name = string("op_854_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_854_end_0 = const()[name = string("op_854_end_0"), val = tensor([5, 1, 32, 2048, 64])]; + tensor var_854_end_mask_0 = const()[name = string("op_854_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_854_squeeze_mask_0 = const()[name = string("op_854_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_854_cast_fp16 = slice_by_index(begin = var_854_begin_0, end = var_854_end_0, end_mask = var_854_end_mask_0, squeeze_mask = var_854_squeeze_mask_0, x = coreml_update_state_56)[name = string("op_854_cast_fp16")]; + tensor var_857_begin_0 = const()[name = string("op_857_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_857_end_mask_0 = const()[name = string("op_857_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_857_cast_fp16 = slice_by_index(begin = var_857_begin_0, end = concat_11, end_mask = var_857_end_mask_0, x = var_854_cast_fp16)[name = string("op_857_cast_fp16")]; + tensor var_859_begin_0 = const()[name = string("op_859_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_859_end_0 = const()[name = string("op_859_end_0"), val = tensor([5, 1, 32, 2048, 64])]; + tensor var_859_end_mask_0 = const()[name = string("op_859_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_859_squeeze_mask_0 = const()[name = string("op_859_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_859_cast_fp16 = slice_by_index(begin = var_859_begin_0, end = var_859_end_0, end_mask = var_859_end_mask_0, squeeze_mask = var_859_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_859_cast_fp16")]; + tensor var_862_begin_0 = const()[name = string("op_862_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_862_end_mask_0 = const()[name = string("op_862_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_862_cast_fp16 = slice_by_index(begin = var_862_begin_0, end = concat_11, end_mask = var_862_end_mask_0, x = var_859_cast_fp16)[name = string("op_862_cast_fp16")]; + tensor var_864_shape_cast_fp16 = shape(x = var_857_cast_fp16)[name = string("op_864_shape_cast_fp16")]; + int32 gather_53_axis_0 = const()[name = string("gather_53_axis_0"), val = int32(0)]; + int32 gather_53_batch_dims_0 = const()[name = string("gather_53_batch_dims_0"), val = int32(0)]; + bool gather_53_validate_indices_0 = const()[name = string("gather_53_validate_indices_0"), val = bool(false)]; + string var_864_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_864_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_53_to_uint16 = const()[name = string("select_53_to_uint16"), val = uint16(2)]; + tensor var_864_shape_cast_fp16_to_uint16 = cast(dtype = var_864_shape_cast_fp16_to_uint16_dtype_0, x = var_864_shape_cast_fp16)[name = string("cast_78")]; + uint16 gather_53_cast_uint16 = gather(axis = gather_53_axis_0, batch_dims = gather_53_batch_dims_0, indices = select_53_to_uint16, validate_indices = gather_53_validate_indices_0, x = var_864_shape_cast_fp16_to_uint16)[name = string("gather_53_cast_uint16")]; + string gather_53_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_53_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_73_values0_0 = const()[name = string("concat_73_values0_0"), val = int32(1)]; + int32 concat_73_values1_0 = const()[name = string("concat_73_values1_0"), val = int32(1)]; + int32 concat_73_values2_0 = const()[name = string("concat_73_values2_0"), val = int32(0)]; + int32 concat_73_axis_0 = const()[name = string("concat_73_axis_0"), val = int32(0)]; + bool concat_73_interleave_0 = const()[name = string("concat_73_interleave_0"), val = bool(false)]; + int32 gather_53_cast_uint16_to_int32 = cast(dtype = gather_53_cast_uint16_to_int32_dtype_0, x = gather_53_cast_uint16)[name = string("cast_77")]; + tensor concat_73 = concat(axis = concat_73_axis_0, interleave = concat_73_interleave_0, values = (concat_73_values0_0, concat_73_values1_0, concat_73_values2_0, gather_53_cast_uint16_to_int32))[name = string("concat_73")]; + tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_11_cast_fp16 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_73, end_mask = causal_mask_11_end_mask_0, x = causal_mask)[name = string("causal_mask_11_cast_fp16")]; + tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = var_857_cast_fp16, query = query_states_19_cast_fp16, value = var_862_cast_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor var_870_perm_0 = const()[name = string("op_870_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_76")]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (gather_44, gather_45_cast_uint16_to_int32, var_69))[name = string("concat_74")]; + tensor var_870_cast_fp16 = transpose(perm = var_870_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_76")]; + tensor input_33_cast_fp16 = reshape(shape = concat_74, x = var_870_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214758272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216855488))))[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_103_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + fp16 var_64_promoted_9_to_fp16 = const()[name = string("op_64_promoted_9_to_fp16"), val = fp16(0x1p+1)]; + tensor var_879_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_64_promoted_9_to_fp16)[name = string("op_879_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_879_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_882_to_fp16 = const()[name = string("op_882_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_883_cast_fp16 = add(x = variance_19_cast_fp16, y = var_882_to_fp16)[name = string("op_883_cast_fp16")]; + fp32 var_884_epsilon_0 = const()[name = string("op_884_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_884_cast_fp16 = rsqrt(epsilon = var_884_epsilon_0, x = var_883_cast_fp16)[name = string("op_884_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_884_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217117696)))]; + tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_123_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217121856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225510528))))[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_896_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_896_cast_fp16")]; + tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226559168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234947840))))[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_896_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235996480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244385152))))[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor hidden_states_129_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; + fp16 var_64_promoted_10_to_fp16 = const()[name = string("op_64_promoted_10_to_fp16"), val = fp16(0x1p+1)]; + tensor var_909_cast_fp16 = pow(x = hidden_states_129_cast_fp16, y = var_64_promoted_10_to_fp16)[name = string("op_909_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_909_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_913_cast_fp16 = add(x = variance_21_cast_fp16, y = var_912_to_fp16)[name = string("op_913_cast_fp16")]; + fp32 var_914_epsilon_0 = const()[name = string("op_914_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_914_cast_fp16 = rsqrt(epsilon = var_914_epsilon_0, x = var_913_cast_fp16)[name = string("op_914_cast_fp16")]; + tensor hidden_states_133_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = var_914_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245433792)))]; + tensor hidden_states_137_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_133_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; + tensor var_925_shape_cast_fp16 = shape(x = hidden_states_137_cast_fp16)[name = string("op_925_shape_cast_fp16")]; + int32 gather_54 = const()[name = string("gather_54"), val = int32(1)]; + int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; + int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; + bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; + string var_925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(1)]; + tensor var_925_shape_cast_fp16_to_uint16 = cast(dtype = var_925_shape_cast_fp16_to_uint16_dtype_0, x = var_925_shape_cast_fp16)[name = string("cast_75")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_925_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; + string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245437952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247535168))))[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_137_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247797376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249894592))))[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_137_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250156800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252254016))))[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_37_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_137_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor concat_75x = const()[name = string("concat_75x"), val = tensor([1, -1, 32, 64])]; + tensor var_934_cast_fp16 = reshape(shape = concat_75x, x = linear_35_cast_fp16)[name = string("op_934_cast_fp16")]; + tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 32, 64])]; + tensor var_937_cast_fp16 = reshape(shape = concat_76x, x = linear_36_cast_fp16)[name = string("op_937_cast_fp16")]; + tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 32, 64])]; + tensor var_940_cast_fp16 = reshape(shape = concat_77x, x = linear_37_cast_fp16)[name = string("op_940_cast_fp16")]; + tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_934_cast_fp16)[name = string("transpose_75")]; + tensor var_944_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_944_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_955_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_955_cast_fp16")]; + bool var_957_interleave_0 = const()[name = string("op_957_interleave_0"), val = bool(false)]; + tensor var_957_cast_fp16 = concat(axis = var_69, interleave = var_957_interleave_0, values = (var_955_cast_fp16, x1_21_cast_fp16))[name = string("op_957_cast_fp16")]; + tensor var_958_cast_fp16 = mul(x = var_957_cast_fp16, y = sin_7_cast_fp16)[name = string("op_958_cast_fp16")]; + tensor query_states_23_cast_fp16 = add(x = var_944_cast_fp16, y = var_958_cast_fp16)[name = string("query_states_23_cast_fp16")]; + tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_937_cast_fp16)[name = string("transpose_74")]; + tensor var_960_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_960_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_971_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_971_cast_fp16")]; + bool var_973_interleave_0 = const()[name = string("op_973_interleave_0"), val = bool(false)]; + tensor var_973_cast_fp16 = concat(axis = var_69, interleave = var_973_interleave_0, values = (var_971_cast_fp16, x1_23_cast_fp16))[name = string("op_973_cast_fp16")]; + tensor var_974_cast_fp16 = mul(x = var_973_cast_fp16, y = sin_7_cast_fp16)[name = string("op_974_cast_fp16")]; + tensor k_state_11_cast_fp16 = add(x = var_960_cast_fp16, y = var_974_cast_fp16)[name = string("k_state_11_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor concat_80_values0_0 = const()[name = string("concat_80_values0_0"), val = tensor([5])]; + int32 concat_80_axis_0 = const()[name = string("concat_80_axis_0"), val = int32(0)]; + bool concat_80_interleave_0 = const()[name = string("concat_80_interleave_0"), val = bool(false)]; + tensor concat_80 = concat(axis = concat_80_axis_0, interleave = concat_80_interleave_0, values = (concat_80_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_80")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_80, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_58")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_940_cast_fp16)[name = string("transpose_73")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_80, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_59")]; + tensor var_997_begin_0 = const()[name = string("op_997_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_997_end_0 = const()[name = string("op_997_end_0"), val = tensor([6, 1, 32, 2048, 64])]; + tensor var_997_end_mask_0 = const()[name = string("op_997_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_997_squeeze_mask_0 = const()[name = string("op_997_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_997_cast_fp16 = slice_by_index(begin = var_997_begin_0, end = var_997_end_0, end_mask = var_997_end_mask_0, squeeze_mask = var_997_squeeze_mask_0, x = coreml_update_state_58)[name = string("op_997_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = string("op_1000_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1000_end_mask_0 = const()[name = string("op_1000_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = concat_11, end_mask = var_1000_end_mask_0, x = var_997_cast_fp16)[name = string("op_1000_cast_fp16")]; + tensor var_1002_begin_0 = const()[name = string("op_1002_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1002_end_0 = const()[name = string("op_1002_end_0"), val = tensor([6, 1, 32, 2048, 64])]; + tensor var_1002_end_mask_0 = const()[name = string("op_1002_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1002_squeeze_mask_0 = const()[name = string("op_1002_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1002_cast_fp16 = slice_by_index(begin = var_1002_begin_0, end = var_1002_end_0, end_mask = var_1002_end_mask_0, squeeze_mask = var_1002_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_1002_cast_fp16")]; + tensor var_1005_begin_0 = const()[name = string("op_1005_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1005_end_mask_0 = const()[name = string("op_1005_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1005_cast_fp16 = slice_by_index(begin = var_1005_begin_0, end = concat_11, end_mask = var_1005_end_mask_0, x = var_1002_cast_fp16)[name = string("op_1005_cast_fp16")]; + tensor var_1007_shape_cast_fp16 = shape(x = var_1000_cast_fp16)[name = string("op_1007_shape_cast_fp16")]; + int32 gather_63_axis_0 = const()[name = string("gather_63_axis_0"), val = int32(0)]; + int32 gather_63_batch_dims_0 = const()[name = string("gather_63_batch_dims_0"), val = int32(0)]; + bool gather_63_validate_indices_0 = const()[name = string("gather_63_validate_indices_0"), val = bool(false)]; + string var_1007_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1007_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_63_to_uint16 = const()[name = string("select_63_to_uint16"), val = uint16(2)]; + tensor var_1007_shape_cast_fp16_to_uint16 = cast(dtype = var_1007_shape_cast_fp16_to_uint16_dtype_0, x = var_1007_shape_cast_fp16)[name = string("cast_74")]; + uint16 gather_63_cast_uint16 = gather(axis = gather_63_axis_0, batch_dims = gather_63_batch_dims_0, indices = select_63_to_uint16, validate_indices = gather_63_validate_indices_0, x = var_1007_shape_cast_fp16_to_uint16)[name = string("gather_63_cast_uint16")]; + string gather_63_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_63_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_88_values0_0 = const()[name = string("concat_88_values0_0"), val = int32(1)]; + int32 concat_88_values1_0 = const()[name = string("concat_88_values1_0"), val = int32(1)]; + int32 concat_88_values2_0 = const()[name = string("concat_88_values2_0"), val = int32(0)]; + int32 concat_88_axis_0 = const()[name = string("concat_88_axis_0"), val = int32(0)]; + bool concat_88_interleave_0 = const()[name = string("concat_88_interleave_0"), val = bool(false)]; + int32 gather_63_cast_uint16_to_int32 = cast(dtype = gather_63_cast_uint16_to_int32_dtype_0, x = gather_63_cast_uint16)[name = string("cast_73")]; + tensor concat_88 = concat(axis = concat_88_axis_0, interleave = concat_88_interleave_0, values = (concat_88_values0_0, concat_88_values1_0, concat_88_values2_0, gather_63_cast_uint16_to_int32))[name = string("concat_88")]; + tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_13_cast_fp16 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_88, end_mask = causal_mask_13_end_mask_0, x = causal_mask)[name = string("causal_mask_13_cast_fp16")]; + tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = var_1000_cast_fp16, query = query_states_23_cast_fp16, value = var_1005_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1013_perm_0 = const()[name = string("op_1013_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; + bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; + int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_72")]; + tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_54, gather_55_cast_uint16_to_int32, var_69))[name = string("concat_89")]; + tensor var_1013_cast_fp16 = transpose(perm = var_1013_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_72")]; + tensor input_41_cast_fp16 = reshape(shape = concat_89, x = var_1013_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252516224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254613440))))[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor hidden_states_145_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; + fp16 var_64_promoted_11_to_fp16 = const()[name = string("op_64_promoted_11_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1022_cast_fp16 = pow(x = hidden_states_145_cast_fp16, y = var_64_promoted_11_to_fp16)[name = string("op_1022_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1022_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1026_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1025_to_fp16)[name = string("op_1026_cast_fp16")]; + fp32 var_1027_epsilon_0 = const()[name = string("op_1027_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1027_cast_fp16 = rsqrt(epsilon = var_1027_epsilon_0, x = var_1026_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = var_1027_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254875648)))]; + tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_149_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254879808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263268480))))[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor var_1039_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1039_cast_fp16")]; + tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264317120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272705792))))[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_1039_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273754432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282143104))))[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_41_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor hidden_states_155_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; + fp16 var_64_promoted_12_to_fp16 = const()[name = string("op_64_promoted_12_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1052_cast_fp16 = pow(x = hidden_states_155_cast_fp16, y = var_64_promoted_12_to_fp16)[name = string("op_1052_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1052_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1055_to_fp16 = const()[name = string("op_1055_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1056_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1055_to_fp16)[name = string("op_1056_cast_fp16")]; + fp32 var_1057_epsilon_0 = const()[name = string("op_1057_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1057_cast_fp16 = rsqrt(epsilon = var_1057_epsilon_0, x = var_1056_cast_fp16)[name = string("op_1057_cast_fp16")]; + tensor hidden_states_159_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = var_1057_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283191744)))]; + tensor hidden_states_163_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor var_1068_shape_cast_fp16 = shape(x = hidden_states_163_cast_fp16)[name = string("op_1068_shape_cast_fp16")]; + int32 gather_64 = const()[name = string("gather_64"), val = int32(1)]; + int32 gather_65_axis_0 = const()[name = string("gather_65_axis_0"), val = int32(0)]; + int32 gather_65_batch_dims_0 = const()[name = string("gather_65_batch_dims_0"), val = int32(0)]; + bool gather_65_validate_indices_0 = const()[name = string("gather_65_validate_indices_0"), val = bool(false)]; + string var_1068_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1068_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_65_to_uint16 = const()[name = string("select_65_to_uint16"), val = uint16(1)]; + tensor var_1068_shape_cast_fp16_to_uint16 = cast(dtype = var_1068_shape_cast_fp16_to_uint16_dtype_0, x = var_1068_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_65_cast_uint16 = gather(axis = gather_65_axis_0, batch_dims = gather_65_batch_dims_0, indices = select_65_to_uint16, validate_indices = gather_65_validate_indices_0, x = var_1068_shape_cast_fp16_to_uint16)[name = string("gather_65_cast_uint16")]; + string gather_65_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_65_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283195904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285293120))))[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_163_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285555328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287652544))))[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_43_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_163_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287914752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290011968))))[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_163_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, -1, 32, 64])]; + tensor var_1077_cast_fp16 = reshape(shape = concat_90x, x = linear_42_cast_fp16)[name = string("op_1077_cast_fp16")]; + tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_91x = const()[name = string("concat_91x"), val = tensor([1, -1, 32, 64])]; + tensor var_1080_cast_fp16 = reshape(shape = concat_91x, x = linear_43_cast_fp16)[name = string("op_1080_cast_fp16")]; + tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, -1, 32, 64])]; + tensor var_1083_cast_fp16 = reshape(shape = concat_92x, x = linear_44_cast_fp16)[name = string("op_1083_cast_fp16")]; + tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1077_cast_fp16)[name = string("transpose_71")]; + tensor var_1087_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1087_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1098_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1098_cast_fp16")]; + bool var_1100_interleave_0 = const()[name = string("op_1100_interleave_0"), val = bool(false)]; + tensor var_1100_cast_fp16 = concat(axis = var_69, interleave = var_1100_interleave_0, values = (var_1098_cast_fp16, x1_25_cast_fp16))[name = string("op_1100_cast_fp16")]; + tensor var_1101_cast_fp16 = mul(x = var_1100_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1101_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_1087_cast_fp16, y = var_1101_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1080_cast_fp16)[name = string("transpose_70")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1103_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1114_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1114_cast_fp16")]; + bool var_1116_interleave_0 = const()[name = string("op_1116_interleave_0"), val = bool(false)]; + tensor var_1116_cast_fp16 = concat(axis = var_69, interleave = var_1116_interleave_0, values = (var_1114_cast_fp16, x1_27_cast_fp16))[name = string("op_1116_cast_fp16")]; + tensor var_1117_cast_fp16 = mul(x = var_1116_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor k_state_13_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1117_cast_fp16)[name = string("k_state_13_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor concat_95_values0_0 = const()[name = string("concat_95_values0_0"), val = tensor([6])]; + int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; + bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; + tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (concat_95_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_95")]; + tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_95, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; + tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1083_cast_fp16)[name = string("transpose_69")]; + tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_95, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; + tensor var_1140_begin_0 = const()[name = string("op_1140_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1140_end_0 = const()[name = string("op_1140_end_0"), val = tensor([7, 1, 32, 2048, 64])]; + tensor var_1140_end_mask_0 = const()[name = string("op_1140_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1140_squeeze_mask_0 = const()[name = string("op_1140_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1140_cast_fp16 = slice_by_index(begin = var_1140_begin_0, end = var_1140_end_0, end_mask = var_1140_end_mask_0, squeeze_mask = var_1140_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_1140_cast_fp16")]; + tensor var_1143_begin_0 = const()[name = string("op_1143_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1143_end_mask_0 = const()[name = string("op_1143_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1143_cast_fp16 = slice_by_index(begin = var_1143_begin_0, end = concat_11, end_mask = var_1143_end_mask_0, x = var_1140_cast_fp16)[name = string("op_1143_cast_fp16")]; + tensor var_1145_begin_0 = const()[name = string("op_1145_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1145_end_0 = const()[name = string("op_1145_end_0"), val = tensor([7, 1, 32, 2048, 64])]; + tensor var_1145_end_mask_0 = const()[name = string("op_1145_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1145_squeeze_mask_0 = const()[name = string("op_1145_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1145_cast_fp16 = slice_by_index(begin = var_1145_begin_0, end = var_1145_end_0, end_mask = var_1145_end_mask_0, squeeze_mask = var_1145_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_1145_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = string("op_1148_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1148_end_mask_0 = const()[name = string("op_1148_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = concat_11, end_mask = var_1148_end_mask_0, x = var_1145_cast_fp16)[name = string("op_1148_cast_fp16")]; + tensor var_1150_shape_cast_fp16 = shape(x = var_1143_cast_fp16)[name = string("op_1150_shape_cast_fp16")]; + int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; + int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; + bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; + string var_1150_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1150_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; + tensor var_1150_shape_cast_fp16_to_uint16 = cast(dtype = var_1150_shape_cast_fp16_to_uint16_dtype_0, x = var_1150_shape_cast_fp16)[name = string("cast_70")]; + uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_1150_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; + string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_103_values0_0 = const()[name = string("concat_103_values0_0"), val = int32(1)]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(1)]; + int32 concat_103_values2_0 = const()[name = string("concat_103_values2_0"), val = int32(0)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_69")]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (concat_103_values0_0, concat_103_values1_0, concat_103_values2_0, gather_73_cast_uint16_to_int32))[name = string("concat_103")]; + tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_15_cast_fp16 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_103, end_mask = causal_mask_15_end_mask_0, x = causal_mask)[name = string("causal_mask_15_cast_fp16")]; + tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = var_1143_cast_fp16, query = query_states_27_cast_fp16, value = var_1148_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1156_perm_0 = const()[name = string("op_1156_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + int32 gather_65_cast_uint16_to_int32 = cast(dtype = gather_65_cast_uint16_to_int32_dtype_0, x = gather_65_cast_uint16)[name = string("cast_68")]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (gather_64, gather_65_cast_uint16_to_int32, var_69))[name = string("concat_104")]; + tensor var_1156_cast_fp16 = transpose(perm = var_1156_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_68")]; + tensor input_49_cast_fp16 = reshape(shape = concat_104, x = var_1156_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290274176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292371392))))[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor hidden_states_171_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; + fp16 var_64_promoted_13_to_fp16 = const()[name = string("op_64_promoted_13_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1165_cast_fp16 = pow(x = hidden_states_171_cast_fp16, y = var_64_promoted_13_to_fp16)[name = string("op_1165_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1165_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1169_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1168_to_fp16)[name = string("op_1169_cast_fp16")]; + fp32 var_1170_epsilon_0 = const()[name = string("op_1170_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1170_cast_fp16 = rsqrt(epsilon = var_1170_epsilon_0, x = var_1169_cast_fp16)[name = string("op_1170_cast_fp16")]; + tensor hidden_states_175_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = var_1170_cast_fp16)[name = string("hidden_states_175_cast_fp16")]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292633600)))]; + tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_175_cast_fp16)[name = string("input_51_cast_fp16")]; + tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292637760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301026432))))[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; + tensor var_1182_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1182_cast_fp16")]; + tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302075072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310463744))))[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor input_55_cast_fp16 = mul(x = var_1182_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; + tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311512384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319901056))))[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + fp16 var_64_promoted_14_to_fp16 = const()[name = string("op_64_promoted_14_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1195_cast_fp16 = pow(x = hidden_states_181_cast_fp16, y = var_64_promoted_14_to_fp16)[name = string("op_1195_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1195_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1198_to_fp16 = const()[name = string("op_1198_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1199_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1198_to_fp16)[name = string("op_1199_cast_fp16")]; + fp32 var_1200_epsilon_0 = const()[name = string("op_1200_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1200_cast_fp16 = rsqrt(epsilon = var_1200_epsilon_0, x = var_1199_cast_fp16)[name = string("op_1200_cast_fp16")]; + tensor hidden_states_185_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = var_1200_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320949696)))]; + tensor hidden_states_189_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_185_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; + tensor var_1211_shape_cast_fp16 = shape(x = hidden_states_189_cast_fp16)[name = string("op_1211_shape_cast_fp16")]; + int32 gather_74 = const()[name = string("gather_74"), val = int32(1)]; + int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; + int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; + bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; + string var_1211_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1211_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(1)]; + tensor var_1211_shape_cast_fp16_to_uint16 = cast(dtype = var_1211_shape_cast_fp16_to_uint16_dtype_0, x = var_1211_shape_cast_fp16)[name = string("cast_67")]; + uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_1211_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; + string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320953856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323051072))))[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_189_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323313280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325410496))))[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_189_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325672704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327769920))))[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_51_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_189_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 32, 64])]; + tensor var_1220_cast_fp16 = reshape(shape = concat_105x, x = linear_49_cast_fp16)[name = string("op_1220_cast_fp16")]; + tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_106x = const()[name = string("concat_106x"), val = tensor([1, -1, 32, 64])]; + tensor var_1223_cast_fp16 = reshape(shape = concat_106x, x = linear_50_cast_fp16)[name = string("op_1223_cast_fp16")]; + tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_107x = const()[name = string("concat_107x"), val = tensor([1, -1, 32, 64])]; + tensor var_1226_cast_fp16 = reshape(shape = concat_107x, x = linear_51_cast_fp16)[name = string("op_1226_cast_fp16")]; + tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1220_cast_fp16)[name = string("transpose_67")]; + tensor var_1230_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1230_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1241_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1241_cast_fp16")]; + bool var_1243_interleave_0 = const()[name = string("op_1243_interleave_0"), val = bool(false)]; + tensor var_1243_cast_fp16 = concat(axis = var_69, interleave = var_1243_interleave_0, values = (var_1241_cast_fp16, x1_29_cast_fp16))[name = string("op_1243_cast_fp16")]; + tensor var_1244_cast_fp16 = mul(x = var_1243_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1244_cast_fp16")]; + tensor query_states_31_cast_fp16 = add(x = var_1230_cast_fp16, y = var_1244_cast_fp16)[name = string("query_states_31_cast_fp16")]; + tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1223_cast_fp16)[name = string("transpose_66")]; + tensor var_1246_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1246_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1257_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1257_cast_fp16")]; + bool var_1259_interleave_0 = const()[name = string("op_1259_interleave_0"), val = bool(false)]; + tensor var_1259_cast_fp16 = concat(axis = var_69, interleave = var_1259_interleave_0, values = (var_1257_cast_fp16, x1_31_cast_fp16))[name = string("op_1259_cast_fp16")]; + tensor var_1260_cast_fp16 = mul(x = var_1259_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1260_cast_fp16")]; + tensor k_state_15_cast_fp16 = add(x = var_1246_cast_fp16, y = var_1260_cast_fp16)[name = string("k_state_15_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor concat_110_values0_0 = const()[name = string("concat_110_values0_0"), val = tensor([7])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (concat_110_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_110")]; + tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_110, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; + tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1226_cast_fp16)[name = string("transpose_65")]; + tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_110, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; + tensor var_1283_begin_0 = const()[name = string("op_1283_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1283_end_0 = const()[name = string("op_1283_end_0"), val = tensor([8, 1, 32, 2048, 64])]; + tensor var_1283_end_mask_0 = const()[name = string("op_1283_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1283_squeeze_mask_0 = const()[name = string("op_1283_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1283_cast_fp16 = slice_by_index(begin = var_1283_begin_0, end = var_1283_end_0, end_mask = var_1283_end_mask_0, squeeze_mask = var_1283_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_1283_cast_fp16")]; + tensor var_1286_begin_0 = const()[name = string("op_1286_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1286_end_mask_0 = const()[name = string("op_1286_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1286_cast_fp16 = slice_by_index(begin = var_1286_begin_0, end = concat_11, end_mask = var_1286_end_mask_0, x = var_1283_cast_fp16)[name = string("op_1286_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([8, 1, 32, 2048, 64])]; + tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1288_squeeze_mask_0 = const()[name = string("op_1288_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, squeeze_mask = var_1288_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_1288_cast_fp16")]; + tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_11, end_mask = var_1291_end_mask_0, x = var_1288_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor var_1293_shape_cast_fp16 = shape(x = var_1286_cast_fp16)[name = string("op_1293_shape_cast_fp16")]; + int32 gather_83_axis_0 = const()[name = string("gather_83_axis_0"), val = int32(0)]; + int32 gather_83_batch_dims_0 = const()[name = string("gather_83_batch_dims_0"), val = int32(0)]; + bool gather_83_validate_indices_0 = const()[name = string("gather_83_validate_indices_0"), val = bool(false)]; + string var_1293_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1293_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_83_to_uint16 = const()[name = string("select_83_to_uint16"), val = uint16(2)]; + tensor var_1293_shape_cast_fp16_to_uint16 = cast(dtype = var_1293_shape_cast_fp16_to_uint16_dtype_0, x = var_1293_shape_cast_fp16)[name = string("cast_66")]; + uint16 gather_83_cast_uint16 = gather(axis = gather_83_axis_0, batch_dims = gather_83_batch_dims_0, indices = select_83_to_uint16, validate_indices = gather_83_validate_indices_0, x = var_1293_shape_cast_fp16_to_uint16)[name = string("gather_83_cast_uint16")]; + string gather_83_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_83_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_118_values0_0 = const()[name = string("concat_118_values0_0"), val = int32(1)]; + int32 concat_118_values1_0 = const()[name = string("concat_118_values1_0"), val = int32(1)]; + int32 concat_118_values2_0 = const()[name = string("concat_118_values2_0"), val = int32(0)]; + int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; + bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; + int32 gather_83_cast_uint16_to_int32 = cast(dtype = gather_83_cast_uint16_to_int32_dtype_0, x = gather_83_cast_uint16)[name = string("cast_65")]; + tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (concat_118_values0_0, concat_118_values1_0, concat_118_values2_0, gather_83_cast_uint16_to_int32))[name = string("concat_118")]; + tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_17_cast_fp16 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_118, end_mask = causal_mask_17_end_mask_0, x = causal_mask)[name = string("causal_mask_17_cast_fp16")]; + tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = var_1286_cast_fp16, query = query_states_31_cast_fp16, value = var_1291_cast_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor var_1299_perm_0 = const()[name = string("op_1299_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; + bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; + int32 gather_75_cast_uint16_to_int32 = cast(dtype = gather_75_cast_uint16_to_int32_dtype_0, x = gather_75_cast_uint16)[name = string("cast_64")]; + tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (gather_74, gather_75_cast_uint16_to_int32, var_69))[name = string("concat_119")]; + tensor var_1299_cast_fp16 = transpose(perm = var_1299_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_64")]; + tensor input_57_cast_fp16 = reshape(shape = concat_119, x = var_1299_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328032128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330129344))))[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor hidden_states_197_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + fp16 var_64_promoted_15_to_fp16 = const()[name = string("op_64_promoted_15_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1308_cast_fp16 = pow(x = hidden_states_197_cast_fp16, y = var_64_promoted_15_to_fp16)[name = string("op_1308_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1308_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1311_to_fp16 = const()[name = string("op_1311_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1312_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1311_to_fp16)[name = string("op_1312_cast_fp16")]; + fp32 var_1313_epsilon_0 = const()[name = string("op_1313_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1313_cast_fp16 = rsqrt(epsilon = var_1313_epsilon_0, x = var_1312_cast_fp16)[name = string("op_1313_cast_fp16")]; + tensor hidden_states_201_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = var_1313_cast_fp16)[name = string("hidden_states_201_cast_fp16")]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330391552)))]; + tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_201_cast_fp16)[name = string("input_59_cast_fp16")]; + tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330395712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338784384))))[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor var_1325_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1325_cast_fp16")]; + tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339833024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348221696))))[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor input_63_cast_fp16 = mul(x = var_1325_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349270336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357659008))))[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor hidden_states_207_cast_fp16 = add(x = hidden_states_197_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_207_cast_fp16")]; + fp16 var_64_promoted_16_to_fp16 = const()[name = string("op_64_promoted_16_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1338_cast_fp16 = pow(x = hidden_states_207_cast_fp16, y = var_64_promoted_16_to_fp16)[name = string("op_1338_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1338_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1341_to_fp16 = const()[name = string("op_1341_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1342_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1341_to_fp16)[name = string("op_1342_cast_fp16")]; + fp32 var_1343_epsilon_0 = const()[name = string("op_1343_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1343_cast_fp16 = rsqrt(epsilon = var_1343_epsilon_0, x = var_1342_cast_fp16)[name = string("op_1343_cast_fp16")]; + tensor hidden_states_211_cast_fp16 = mul(x = hidden_states_207_cast_fp16, y = var_1343_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358707648)))]; + tensor hidden_states_215_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_211_cast_fp16)[name = string("hidden_states_215_cast_fp16")]; + tensor var_1354_shape_cast_fp16 = shape(x = hidden_states_215_cast_fp16)[name = string("op_1354_shape_cast_fp16")]; + int32 gather_84 = const()[name = string("gather_84"), val = int32(1)]; + int32 gather_85_axis_0 = const()[name = string("gather_85_axis_0"), val = int32(0)]; + int32 gather_85_batch_dims_0 = const()[name = string("gather_85_batch_dims_0"), val = int32(0)]; + bool gather_85_validate_indices_0 = const()[name = string("gather_85_validate_indices_0"), val = bool(false)]; + string var_1354_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1354_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_85_to_uint16 = const()[name = string("select_85_to_uint16"), val = uint16(1)]; + tensor var_1354_shape_cast_fp16_to_uint16 = cast(dtype = var_1354_shape_cast_fp16_to_uint16_dtype_0, x = var_1354_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_85_cast_uint16 = gather(axis = gather_85_axis_0, batch_dims = gather_85_batch_dims_0, indices = select_85_to_uint16, validate_indices = gather_85_validate_indices_0, x = var_1354_shape_cast_fp16_to_uint16)[name = string("gather_85_cast_uint16")]; + string gather_85_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_85_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358711808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360809024))))[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_215_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361071232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363168448))))[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_57_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_215_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363430656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365527872))))[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_215_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor concat_120x = const()[name = string("concat_120x"), val = tensor([1, -1, 32, 64])]; + tensor var_1363_cast_fp16 = reshape(shape = concat_120x, x = linear_56_cast_fp16)[name = string("op_1363_cast_fp16")]; + tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_121x = const()[name = string("concat_121x"), val = tensor([1, -1, 32, 64])]; + tensor var_1366_cast_fp16 = reshape(shape = concat_121x, x = linear_57_cast_fp16)[name = string("op_1366_cast_fp16")]; + tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 32, 64])]; + tensor var_1369_cast_fp16 = reshape(shape = concat_122x, x = linear_58_cast_fp16)[name = string("op_1369_cast_fp16")]; + tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1363_cast_fp16)[name = string("transpose_63")]; + tensor var_1373_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1373_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1384_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1384_cast_fp16")]; + bool var_1386_interleave_0 = const()[name = string("op_1386_interleave_0"), val = bool(false)]; + tensor var_1386_cast_fp16 = concat(axis = var_69, interleave = var_1386_interleave_0, values = (var_1384_cast_fp16, x1_33_cast_fp16))[name = string("op_1386_cast_fp16")]; + tensor var_1387_cast_fp16 = mul(x = var_1386_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1387_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_1373_cast_fp16, y = var_1387_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1366_cast_fp16)[name = string("transpose_62")]; + tensor var_1389_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1389_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1400_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1400_cast_fp16")]; + bool var_1402_interleave_0 = const()[name = string("op_1402_interleave_0"), val = bool(false)]; + tensor var_1402_cast_fp16 = concat(axis = var_69, interleave = var_1402_interleave_0, values = (var_1400_cast_fp16, x1_35_cast_fp16))[name = string("op_1402_cast_fp16")]; + tensor var_1403_cast_fp16 = mul(x = var_1402_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1403_cast_fp16")]; + tensor k_state_17_cast_fp16 = add(x = var_1389_cast_fp16, y = var_1403_cast_fp16)[name = string("k_state_17_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor concat_125_values0_0 = const()[name = string("concat_125_values0_0"), val = tensor([8])]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (concat_125_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_125")]; + tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_125, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; + tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1369_cast_fp16)[name = string("transpose_61")]; + tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_125, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; + tensor var_1426_begin_0 = const()[name = string("op_1426_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1426_end_0 = const()[name = string("op_1426_end_0"), val = tensor([9, 1, 32, 2048, 64])]; + tensor var_1426_end_mask_0 = const()[name = string("op_1426_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1426_squeeze_mask_0 = const()[name = string("op_1426_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, squeeze_mask = var_1426_squeeze_mask_0, x = coreml_update_state_64)[name = string("op_1426_cast_fp16")]; + tensor var_1429_begin_0 = const()[name = string("op_1429_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1429_end_mask_0 = const()[name = string("op_1429_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1429_cast_fp16 = slice_by_index(begin = var_1429_begin_0, end = concat_11, end_mask = var_1429_end_mask_0, x = var_1426_cast_fp16)[name = string("op_1429_cast_fp16")]; + tensor var_1431_begin_0 = const()[name = string("op_1431_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1431_end_0 = const()[name = string("op_1431_end_0"), val = tensor([9, 1, 32, 2048, 64])]; + tensor var_1431_end_mask_0 = const()[name = string("op_1431_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1431_squeeze_mask_0 = const()[name = string("op_1431_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1431_cast_fp16 = slice_by_index(begin = var_1431_begin_0, end = var_1431_end_0, end_mask = var_1431_end_mask_0, squeeze_mask = var_1431_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_1431_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = string("op_1434_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1434_end_mask_0 = const()[name = string("op_1434_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = concat_11, end_mask = var_1434_end_mask_0, x = var_1431_cast_fp16)[name = string("op_1434_cast_fp16")]; + tensor var_1436_shape_cast_fp16 = shape(x = var_1429_cast_fp16)[name = string("op_1436_shape_cast_fp16")]; + int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; + int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; + bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; + string var_1436_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1436_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; + tensor var_1436_shape_cast_fp16_to_uint16 = cast(dtype = var_1436_shape_cast_fp16_to_uint16_dtype_0, x = var_1436_shape_cast_fp16)[name = string("cast_62")]; + uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_1436_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; + string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_133_values0_0 = const()[name = string("concat_133_values0_0"), val = int32(1)]; + int32 concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = int32(1)]; + int32 concat_133_values2_0 = const()[name = string("concat_133_values2_0"), val = int32(0)]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + int32 gather_93_cast_uint16_to_int32 = cast(dtype = gather_93_cast_uint16_to_int32_dtype_0, x = gather_93_cast_uint16)[name = string("cast_61")]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (concat_133_values0_0, concat_133_values1_0, concat_133_values2_0, gather_93_cast_uint16_to_int32))[name = string("concat_133")]; + tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_19_cast_fp16 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_133, end_mask = causal_mask_19_end_mask_0, x = causal_mask)[name = string("causal_mask_19_cast_fp16")]; + tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = var_1429_cast_fp16, query = query_states_35_cast_fp16, value = var_1434_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_1442_perm_0 = const()[name = string("op_1442_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; + bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; + int32 gather_85_cast_uint16_to_int32 = cast(dtype = gather_85_cast_uint16_to_int32_dtype_0, x = gather_85_cast_uint16)[name = string("cast_60")]; + tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (gather_84, gather_85_cast_uint16_to_int32, var_69))[name = string("concat_134")]; + tensor var_1442_cast_fp16 = transpose(perm = var_1442_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_60")]; + tensor input_65_cast_fp16 = reshape(shape = concat_134, x = var_1442_cast_fp16)[name = string("input_65_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365790080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367887296))))[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor hidden_states_223_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; + fp16 var_64_promoted_17_to_fp16 = const()[name = string("op_64_promoted_17_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1451_cast_fp16 = pow(x = hidden_states_223_cast_fp16, y = var_64_promoted_17_to_fp16)[name = string("op_1451_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1451_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1455_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1454_to_fp16)[name = string("op_1455_cast_fp16")]; + fp32 var_1456_epsilon_0 = const()[name = string("op_1456_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1456_cast_fp16 = rsqrt(epsilon = var_1456_epsilon_0, x = var_1455_cast_fp16)[name = string("op_1456_cast_fp16")]; + tensor hidden_states_227_cast_fp16 = mul(x = hidden_states_223_cast_fp16, y = var_1456_cast_fp16)[name = string("hidden_states_227_cast_fp16")]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368149504)))]; + tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_227_cast_fp16)[name = string("input_67_cast_fp16")]; + tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368153664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376542336))))[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1468_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1468_cast_fp16")]; + tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377590976))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385979648))))[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_1468_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387028288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395416960))))[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor hidden_states_233_cast_fp16 = add(x = hidden_states_223_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; + fp16 var_64_promoted_18_to_fp16 = const()[name = string("op_64_promoted_18_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1481_cast_fp16 = pow(x = hidden_states_233_cast_fp16, y = var_64_promoted_18_to_fp16)[name = string("op_1481_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1481_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1485_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1484_to_fp16)[name = string("op_1485_cast_fp16")]; + fp32 var_1486_epsilon_0 = const()[name = string("op_1486_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1486_cast_fp16 = rsqrt(epsilon = var_1486_epsilon_0, x = var_1485_cast_fp16)[name = string("op_1486_cast_fp16")]; + tensor hidden_states_237_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = var_1486_cast_fp16)[name = string("hidden_states_237_cast_fp16")]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396465600)))]; + tensor hidden_states_241_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_237_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; + tensor var_1497_shape_cast_fp16 = shape(x = hidden_states_241_cast_fp16)[name = string("op_1497_shape_cast_fp16")]; + int32 gather_94 = const()[name = string("gather_94"), val = int32(1)]; + int32 gather_95_axis_0 = const()[name = string("gather_95_axis_0"), val = int32(0)]; + int32 gather_95_batch_dims_0 = const()[name = string("gather_95_batch_dims_0"), val = int32(0)]; + bool gather_95_validate_indices_0 = const()[name = string("gather_95_validate_indices_0"), val = bool(false)]; + string var_1497_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1497_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_95_to_uint16 = const()[name = string("select_95_to_uint16"), val = uint16(1)]; + tensor var_1497_shape_cast_fp16_to_uint16 = cast(dtype = var_1497_shape_cast_fp16_to_uint16_dtype_0, x = var_1497_shape_cast_fp16)[name = string("cast_59")]; + uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1497_shape_cast_fp16_to_uint16)[name = string("gather_95_cast_uint16")]; + string gather_95_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_95_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396469760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398566976))))[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_241_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398829184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400926400))))[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_64_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_241_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401188608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403285824))))[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_65_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_241_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 32, 64])]; + tensor var_1506_cast_fp16 = reshape(shape = concat_135x, x = linear_63_cast_fp16)[name = string("op_1506_cast_fp16")]; + tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_136x = const()[name = string("concat_136x"), val = tensor([1, -1, 32, 64])]; + tensor var_1509_cast_fp16 = reshape(shape = concat_136x, x = linear_64_cast_fp16)[name = string("op_1509_cast_fp16")]; + tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_137x = const()[name = string("concat_137x"), val = tensor([1, -1, 32, 64])]; + tensor var_1512_cast_fp16 = reshape(shape = concat_137x, x = linear_65_cast_fp16)[name = string("op_1512_cast_fp16")]; + tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1506_cast_fp16)[name = string("transpose_59")]; + tensor var_1516_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1516_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1527_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1527_cast_fp16")]; + bool var_1529_interleave_0 = const()[name = string("op_1529_interleave_0"), val = bool(false)]; + tensor var_1529_cast_fp16 = concat(axis = var_69, interleave = var_1529_interleave_0, values = (var_1527_cast_fp16, x1_37_cast_fp16))[name = string("op_1529_cast_fp16")]; + tensor var_1530_cast_fp16 = mul(x = var_1529_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1530_cast_fp16")]; + tensor query_states_39_cast_fp16 = add(x = var_1516_cast_fp16, y = var_1530_cast_fp16)[name = string("query_states_39_cast_fp16")]; + tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1509_cast_fp16)[name = string("transpose_58")]; + tensor var_1532_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1532_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1543_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1543_cast_fp16")]; + bool var_1545_interleave_0 = const()[name = string("op_1545_interleave_0"), val = bool(false)]; + tensor var_1545_cast_fp16 = concat(axis = var_69, interleave = var_1545_interleave_0, values = (var_1543_cast_fp16, x1_39_cast_fp16))[name = string("op_1545_cast_fp16")]; + tensor var_1546_cast_fp16 = mul(x = var_1545_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1546_cast_fp16")]; + tensor k_state_19_cast_fp16 = add(x = var_1532_cast_fp16, y = var_1546_cast_fp16)[name = string("k_state_19_cast_fp16")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor concat_140_values0_0 = const()[name = string("concat_140_values0_0"), val = tensor([9])]; + int32 concat_140_axis_0 = const()[name = string("concat_140_axis_0"), val = int32(0)]; + bool concat_140_interleave_0 = const()[name = string("concat_140_interleave_0"), val = bool(false)]; + tensor concat_140 = concat(axis = concat_140_axis_0, interleave = concat_140_interleave_0, values = (concat_140_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_140")]; + tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_140, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; + tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_1512_cast_fp16)[name = string("transpose_57")]; + tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_140, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; + tensor var_1569_begin_0 = const()[name = string("op_1569_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1569_end_0 = const()[name = string("op_1569_end_0"), val = tensor([10, 1, 32, 2048, 64])]; + tensor var_1569_end_mask_0 = const()[name = string("op_1569_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1569_squeeze_mask_0 = const()[name = string("op_1569_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1569_cast_fp16 = slice_by_index(begin = var_1569_begin_0, end = var_1569_end_0, end_mask = var_1569_end_mask_0, squeeze_mask = var_1569_squeeze_mask_0, x = coreml_update_state_66)[name = string("op_1569_cast_fp16")]; + tensor var_1572_begin_0 = const()[name = string("op_1572_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1572_end_mask_0 = const()[name = string("op_1572_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1572_cast_fp16 = slice_by_index(begin = var_1572_begin_0, end = concat_11, end_mask = var_1572_end_mask_0, x = var_1569_cast_fp16)[name = string("op_1572_cast_fp16")]; + tensor var_1574_begin_0 = const()[name = string("op_1574_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1574_end_0 = const()[name = string("op_1574_end_0"), val = tensor([10, 1, 32, 2048, 64])]; + tensor var_1574_end_mask_0 = const()[name = string("op_1574_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1574_squeeze_mask_0 = const()[name = string("op_1574_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1574_cast_fp16 = slice_by_index(begin = var_1574_begin_0, end = var_1574_end_0, end_mask = var_1574_end_mask_0, squeeze_mask = var_1574_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_1574_cast_fp16")]; + tensor var_1577_begin_0 = const()[name = string("op_1577_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1577_end_mask_0 = const()[name = string("op_1577_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1577_cast_fp16 = slice_by_index(begin = var_1577_begin_0, end = concat_11, end_mask = var_1577_end_mask_0, x = var_1574_cast_fp16)[name = string("op_1577_cast_fp16")]; + tensor var_1579_shape_cast_fp16 = shape(x = var_1572_cast_fp16)[name = string("op_1579_shape_cast_fp16")]; + int32 gather_103_axis_0 = const()[name = string("gather_103_axis_0"), val = int32(0)]; + int32 gather_103_batch_dims_0 = const()[name = string("gather_103_batch_dims_0"), val = int32(0)]; + bool gather_103_validate_indices_0 = const()[name = string("gather_103_validate_indices_0"), val = bool(false)]; + string var_1579_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1579_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_103_to_uint16 = const()[name = string("select_103_to_uint16"), val = uint16(2)]; + tensor var_1579_shape_cast_fp16_to_uint16 = cast(dtype = var_1579_shape_cast_fp16_to_uint16_dtype_0, x = var_1579_shape_cast_fp16)[name = string("cast_58")]; + uint16 gather_103_cast_uint16 = gather(axis = gather_103_axis_0, batch_dims = gather_103_batch_dims_0, indices = select_103_to_uint16, validate_indices = gather_103_validate_indices_0, x = var_1579_shape_cast_fp16_to_uint16)[name = string("gather_103_cast_uint16")]; + string gather_103_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_103_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(1)]; + int32 concat_148_values1_0 = const()[name = string("concat_148_values1_0"), val = int32(1)]; + int32 concat_148_values2_0 = const()[name = string("concat_148_values2_0"), val = int32(0)]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + int32 gather_103_cast_uint16_to_int32 = cast(dtype = gather_103_cast_uint16_to_int32_dtype_0, x = gather_103_cast_uint16)[name = string("cast_57")]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, concat_148_values1_0, concat_148_values2_0, gather_103_cast_uint16_to_int32))[name = string("concat_148")]; + tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_21_cast_fp16 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_148, end_mask = causal_mask_21_end_mask_0, x = causal_mask)[name = string("causal_mask_21_cast_fp16")]; + tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = var_1572_cast_fp16, query = query_states_39_cast_fp16, value = var_1577_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1585_perm_0 = const()[name = string("op_1585_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_149_axis_0 = const()[name = string("concat_149_axis_0"), val = int32(0)]; + bool concat_149_interleave_0 = const()[name = string("concat_149_interleave_0"), val = bool(false)]; + int32 gather_95_cast_uint16_to_int32 = cast(dtype = gather_95_cast_uint16_to_int32_dtype_0, x = gather_95_cast_uint16)[name = string("cast_56")]; + tensor concat_149 = concat(axis = concat_149_axis_0, interleave = concat_149_interleave_0, values = (gather_94, gather_95_cast_uint16_to_int32, var_69))[name = string("concat_149")]; + tensor var_1585_cast_fp16 = transpose(perm = var_1585_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_56")]; + tensor input_73_cast_fp16 = reshape(shape = concat_149, x = var_1585_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403548032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405645248))))[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor hidden_states_249_cast_fp16 = add(x = hidden_states_233_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; + fp16 var_64_promoted_19_to_fp16 = const()[name = string("op_64_promoted_19_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1594_cast_fp16 = pow(x = hidden_states_249_cast_fp16, y = var_64_promoted_19_to_fp16)[name = string("op_1594_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1594_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1598_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1597_to_fp16)[name = string("op_1598_cast_fp16")]; + fp32 var_1599_epsilon_0 = const()[name = string("op_1599_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1599_cast_fp16 = rsqrt(epsilon = var_1599_epsilon_0, x = var_1598_cast_fp16)[name = string("op_1599_cast_fp16")]; + tensor hidden_states_253_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = var_1599_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405907456)))]; + tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_253_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405911616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414300288))))[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1611_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1611_cast_fp16")]; + tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415348928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423737600))))[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor input_79_cast_fp16 = mul(x = var_1611_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; + tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424786240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433174912))))[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_69_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_249_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + fp16 var_64_promoted_20_to_fp16 = const()[name = string("op_64_promoted_20_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1624_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_64_promoted_20_to_fp16)[name = string("op_1624_cast_fp16")]; + tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; + bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; + tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1624_cast_fp16)[name = string("variance_41_cast_fp16")]; + fp16 var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1628_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1627_to_fp16)[name = string("op_1628_cast_fp16")]; + fp32 var_1629_epsilon_0 = const()[name = string("op_1629_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1629_cast_fp16 = rsqrt(epsilon = var_1629_epsilon_0, x = var_1628_cast_fp16)[name = string("op_1629_cast_fp16")]; + tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_1629_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434223552)))]; + tensor hidden_states_267_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_263_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; + tensor var_1640_shape_cast_fp16 = shape(x = hidden_states_267_cast_fp16)[name = string("op_1640_shape_cast_fp16")]; + int32 gather_104 = const()[name = string("gather_104"), val = int32(1)]; + int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; + int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; + bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; + string var_1640_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1640_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(1)]; + tensor var_1640_shape_cast_fp16_to_uint16 = cast(dtype = var_1640_shape_cast_fp16_to_uint16_dtype_0, x = var_1640_shape_cast_fp16)[name = string("cast_55")]; + uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1640_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; + string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434227712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436324928))))[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_267_cast_fp16)[name = string("linear_70_cast_fp16")]; + tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436587136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438684352))))[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_71_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_267_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438946560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441043776))))[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_72_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_267_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor concat_150x = const()[name = string("concat_150x"), val = tensor([1, -1, 32, 64])]; + tensor var_1649_cast_fp16 = reshape(shape = concat_150x, x = linear_70_cast_fp16)[name = string("op_1649_cast_fp16")]; + tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_151x = const()[name = string("concat_151x"), val = tensor([1, -1, 32, 64])]; + tensor var_1652_cast_fp16 = reshape(shape = concat_151x, x = linear_71_cast_fp16)[name = string("op_1652_cast_fp16")]; + tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 32, 64])]; + tensor var_1655_cast_fp16 = reshape(shape = concat_152x, x = linear_72_cast_fp16)[name = string("op_1655_cast_fp16")]; + tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_1649_cast_fp16)[name = string("transpose_55")]; + tensor var_1659_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1659_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1670_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_1670_cast_fp16")]; + bool var_1672_interleave_0 = const()[name = string("op_1672_interleave_0"), val = bool(false)]; + tensor var_1672_cast_fp16 = concat(axis = var_69, interleave = var_1672_interleave_0, values = (var_1670_cast_fp16, x1_41_cast_fp16))[name = string("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = mul(x = var_1672_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1673_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_1659_cast_fp16, y = var_1673_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_1652_cast_fp16)[name = string("transpose_54")]; + tensor var_1675_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1686_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1686_cast_fp16")]; + bool var_1688_interleave_0 = const()[name = string("op_1688_interleave_0"), val = bool(false)]; + tensor var_1688_cast_fp16 = concat(axis = var_69, interleave = var_1688_interleave_0, values = (var_1686_cast_fp16, x1_43_cast_fp16))[name = string("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = mul(x = var_1688_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1689_cast_fp16")]; + tensor k_state_21_cast_fp16 = add(x = var_1675_cast_fp16, y = var_1689_cast_fp16)[name = string("k_state_21_cast_fp16")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor concat_155_values0_0 = const()[name = string("concat_155_values0_0"), val = tensor([10])]; + int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; + bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; + tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (concat_155_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_155")]; + tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_155, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; + tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_1655_cast_fp16)[name = string("transpose_53")]; + tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_155, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; + tensor var_1712_begin_0 = const()[name = string("op_1712_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_1712_end_0 = const()[name = string("op_1712_end_0"), val = tensor([11, 1, 32, 2048, 64])]; + tensor var_1712_end_mask_0 = const()[name = string("op_1712_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1712_squeeze_mask_0 = const()[name = string("op_1712_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1712_cast_fp16 = slice_by_index(begin = var_1712_begin_0, end = var_1712_end_0, end_mask = var_1712_end_mask_0, squeeze_mask = var_1712_squeeze_mask_0, x = coreml_update_state_68)[name = string("op_1712_cast_fp16")]; + tensor var_1715_begin_0 = const()[name = string("op_1715_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1715_end_mask_0 = const()[name = string("op_1715_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1715_cast_fp16 = slice_by_index(begin = var_1715_begin_0, end = concat_11, end_mask = var_1715_end_mask_0, x = var_1712_cast_fp16)[name = string("op_1715_cast_fp16")]; + tensor var_1717_begin_0 = const()[name = string("op_1717_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_1717_end_0 = const()[name = string("op_1717_end_0"), val = tensor([11, 1, 32, 2048, 64])]; + tensor var_1717_end_mask_0 = const()[name = string("op_1717_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1717_squeeze_mask_0 = const()[name = string("op_1717_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1717_cast_fp16 = slice_by_index(begin = var_1717_begin_0, end = var_1717_end_0, end_mask = var_1717_end_mask_0, squeeze_mask = var_1717_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_1717_cast_fp16")]; + tensor var_1720_begin_0 = const()[name = string("op_1720_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1720_end_mask_0 = const()[name = string("op_1720_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1720_cast_fp16 = slice_by_index(begin = var_1720_begin_0, end = concat_11, end_mask = var_1720_end_mask_0, x = var_1717_cast_fp16)[name = string("op_1720_cast_fp16")]; + tensor var_1722_shape_cast_fp16 = shape(x = var_1715_cast_fp16)[name = string("op_1722_shape_cast_fp16")]; + int32 gather_113_axis_0 = const()[name = string("gather_113_axis_0"), val = int32(0)]; + int32 gather_113_batch_dims_0 = const()[name = string("gather_113_batch_dims_0"), val = int32(0)]; + bool gather_113_validate_indices_0 = const()[name = string("gather_113_validate_indices_0"), val = bool(false)]; + string var_1722_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1722_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_113_to_uint16 = const()[name = string("select_113_to_uint16"), val = uint16(2)]; + tensor var_1722_shape_cast_fp16_to_uint16 = cast(dtype = var_1722_shape_cast_fp16_to_uint16_dtype_0, x = var_1722_shape_cast_fp16)[name = string("cast_54")]; + uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1722_shape_cast_fp16_to_uint16)[name = string("gather_113_cast_uint16")]; + string gather_113_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_113_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_163_values0_0 = const()[name = string("concat_163_values0_0"), val = int32(1)]; + int32 concat_163_values1_0 = const()[name = string("concat_163_values1_0"), val = int32(1)]; + int32 concat_163_values2_0 = const()[name = string("concat_163_values2_0"), val = int32(0)]; + int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; + bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; + int32 gather_113_cast_uint16_to_int32 = cast(dtype = gather_113_cast_uint16_to_int32_dtype_0, x = gather_113_cast_uint16)[name = string("cast_53")]; + tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (concat_163_values0_0, concat_163_values1_0, concat_163_values2_0, gather_113_cast_uint16_to_int32))[name = string("concat_163")]; + tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_23_cast_fp16 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_163, end_mask = causal_mask_23_end_mask_0, x = causal_mask)[name = string("causal_mask_23_cast_fp16")]; + tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = var_1715_cast_fp16, query = query_states_43_cast_fp16, value = var_1720_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_1728_perm_0 = const()[name = string("op_1728_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_52")]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (gather_104, gather_105_cast_uint16_to_int32, var_69))[name = string("concat_164")]; + tensor var_1728_cast_fp16 = transpose(perm = var_1728_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_52")]; + tensor input_81_cast_fp16 = reshape(shape = concat_164, x = var_1728_cast_fp16)[name = string("input_81_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441305984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443403200))))[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor hidden_states_275_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_275_cast_fp16")]; + fp16 var_64_promoted_21_to_fp16 = const()[name = string("op_64_promoted_21_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1737_cast_fp16 = pow(x = hidden_states_275_cast_fp16, y = var_64_promoted_21_to_fp16)[name = string("op_1737_cast_fp16")]; + tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; + bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; + tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1737_cast_fp16)[name = string("variance_43_cast_fp16")]; + fp16 var_1740_to_fp16 = const()[name = string("op_1740_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1741_cast_fp16 = add(x = variance_43_cast_fp16, y = var_1740_to_fp16)[name = string("op_1741_cast_fp16")]; + fp32 var_1742_epsilon_0 = const()[name = string("op_1742_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1742_cast_fp16 = rsqrt(epsilon = var_1742_epsilon_0, x = var_1741_cast_fp16)[name = string("op_1742_cast_fp16")]; + tensor hidden_states_279_cast_fp16 = mul(x = hidden_states_275_cast_fp16, y = var_1742_cast_fp16)[name = string("hidden_states_279_cast_fp16")]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443665408)))]; + tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_279_cast_fp16)[name = string("input_83_cast_fp16")]; + tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443669568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452058240))))[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_1754_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_1754_cast_fp16")]; + tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453106880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461495552))))[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = var_1754_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; + tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462544192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470932864))))[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_76_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor hidden_states_285_cast_fp16 = add(x = hidden_states_275_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; + fp16 var_64_promoted_22_to_fp16 = const()[name = string("op_64_promoted_22_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1767_cast_fp16 = pow(x = hidden_states_285_cast_fp16, y = var_64_promoted_22_to_fp16)[name = string("op_1767_cast_fp16")]; + tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; + bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; + tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1767_cast_fp16)[name = string("variance_45_cast_fp16")]; + fp16 var_1770_to_fp16 = const()[name = string("op_1770_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1771_cast_fp16 = add(x = variance_45_cast_fp16, y = var_1770_to_fp16)[name = string("op_1771_cast_fp16")]; + fp32 var_1772_epsilon_0 = const()[name = string("op_1772_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1772_cast_fp16 = rsqrt(epsilon = var_1772_epsilon_0, x = var_1771_cast_fp16)[name = string("op_1772_cast_fp16")]; + tensor hidden_states_289_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = var_1772_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471981504)))]; + tensor hidden_states_293_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_289_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + tensor var_1783_shape_cast_fp16 = shape(x = hidden_states_293_cast_fp16)[name = string("op_1783_shape_cast_fp16")]; + int32 gather_114 = const()[name = string("gather_114"), val = int32(1)]; + int32 gather_115_axis_0 = const()[name = string("gather_115_axis_0"), val = int32(0)]; + int32 gather_115_batch_dims_0 = const()[name = string("gather_115_batch_dims_0"), val = int32(0)]; + bool gather_115_validate_indices_0 = const()[name = string("gather_115_validate_indices_0"), val = bool(false)]; + string var_1783_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1783_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_115_to_uint16 = const()[name = string("select_115_to_uint16"), val = uint16(1)]; + tensor var_1783_shape_cast_fp16_to_uint16 = cast(dtype = var_1783_shape_cast_fp16_to_uint16_dtype_0, x = var_1783_shape_cast_fp16)[name = string("cast_51")]; + uint16 gather_115_cast_uint16 = gather(axis = gather_115_axis_0, batch_dims = gather_115_batch_dims_0, indices = select_115_to_uint16, validate_indices = gather_115_validate_indices_0, x = var_1783_shape_cast_fp16_to_uint16)[name = string("gather_115_cast_uint16")]; + string gather_115_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_115_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471985664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474082880))))[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_293_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474345088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476442304))))[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_78_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_293_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476704512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478801728))))[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_79_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_293_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor concat_165x = const()[name = string("concat_165x"), val = tensor([1, -1, 32, 64])]; + tensor var_1792_cast_fp16 = reshape(shape = concat_165x, x = linear_77_cast_fp16)[name = string("op_1792_cast_fp16")]; + tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 32, 64])]; + tensor var_1795_cast_fp16 = reshape(shape = concat_166x, x = linear_78_cast_fp16)[name = string("op_1795_cast_fp16")]; + tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 32, 64])]; + tensor var_1798_cast_fp16 = reshape(shape = concat_167x, x = linear_79_cast_fp16)[name = string("op_1798_cast_fp16")]; + tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_1792_cast_fp16)[name = string("transpose_51")]; + tensor var_1802_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1802_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1813_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_1813_cast_fp16")]; + bool var_1815_interleave_0 = const()[name = string("op_1815_interleave_0"), val = bool(false)]; + tensor var_1815_cast_fp16 = concat(axis = var_69, interleave = var_1815_interleave_0, values = (var_1813_cast_fp16, x1_45_cast_fp16))[name = string("op_1815_cast_fp16")]; + tensor var_1816_cast_fp16 = mul(x = var_1815_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1816_cast_fp16")]; + tensor query_states_47_cast_fp16 = add(x = var_1802_cast_fp16, y = var_1816_cast_fp16)[name = string("query_states_47_cast_fp16")]; + tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_1795_cast_fp16)[name = string("transpose_50")]; + tensor var_1818_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1818_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1829_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_1829_cast_fp16")]; + bool var_1831_interleave_0 = const()[name = string("op_1831_interleave_0"), val = bool(false)]; + tensor var_1831_cast_fp16 = concat(axis = var_69, interleave = var_1831_interleave_0, values = (var_1829_cast_fp16, x1_47_cast_fp16))[name = string("op_1831_cast_fp16")]; + tensor var_1832_cast_fp16 = mul(x = var_1831_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor k_state_23_cast_fp16 = add(x = var_1818_cast_fp16, y = var_1832_cast_fp16)[name = string("k_state_23_cast_fp16")]; + tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; + tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; + tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; + tensor concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = tensor([11])]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_170")]; + tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_170, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; + tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_1798_cast_fp16)[name = string("transpose_49")]; + tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_170, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; + tensor var_1855_begin_0 = const()[name = string("op_1855_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_1855_end_0 = const()[name = string("op_1855_end_0"), val = tensor([12, 1, 32, 2048, 64])]; + tensor var_1855_end_mask_0 = const()[name = string("op_1855_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1855_squeeze_mask_0 = const()[name = string("op_1855_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1855_cast_fp16 = slice_by_index(begin = var_1855_begin_0, end = var_1855_end_0, end_mask = var_1855_end_mask_0, squeeze_mask = var_1855_squeeze_mask_0, x = coreml_update_state_70)[name = string("op_1855_cast_fp16")]; + tensor var_1858_begin_0 = const()[name = string("op_1858_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1858_end_mask_0 = const()[name = string("op_1858_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1858_cast_fp16 = slice_by_index(begin = var_1858_begin_0, end = concat_11, end_mask = var_1858_end_mask_0, x = var_1855_cast_fp16)[name = string("op_1858_cast_fp16")]; + tensor var_1860_begin_0 = const()[name = string("op_1860_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_1860_end_0 = const()[name = string("op_1860_end_0"), val = tensor([12, 1, 32, 2048, 64])]; + tensor var_1860_end_mask_0 = const()[name = string("op_1860_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1860_squeeze_mask_0 = const()[name = string("op_1860_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1860_cast_fp16 = slice_by_index(begin = var_1860_begin_0, end = var_1860_end_0, end_mask = var_1860_end_mask_0, squeeze_mask = var_1860_squeeze_mask_0, x = coreml_update_state_71)[name = string("op_1860_cast_fp16")]; + tensor var_1863_begin_0 = const()[name = string("op_1863_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1863_end_mask_0 = const()[name = string("op_1863_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1863_cast_fp16 = slice_by_index(begin = var_1863_begin_0, end = concat_11, end_mask = var_1863_end_mask_0, x = var_1860_cast_fp16)[name = string("op_1863_cast_fp16")]; + tensor var_1865_shape_cast_fp16 = shape(x = var_1858_cast_fp16)[name = string("op_1865_shape_cast_fp16")]; + int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; + int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; + bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; + string var_1865_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1865_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; + tensor var_1865_shape_cast_fp16_to_uint16 = cast(dtype = var_1865_shape_cast_fp16_to_uint16_dtype_0, x = var_1865_shape_cast_fp16)[name = string("cast_50")]; + uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1865_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; + string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_178_values0_0 = const()[name = string("concat_178_values0_0"), val = int32(1)]; + int32 concat_178_values1_0 = const()[name = string("concat_178_values1_0"), val = int32(1)]; + int32 concat_178_values2_0 = const()[name = string("concat_178_values2_0"), val = int32(0)]; + int32 concat_178_axis_0 = const()[name = string("concat_178_axis_0"), val = int32(0)]; + bool concat_178_interleave_0 = const()[name = string("concat_178_interleave_0"), val = bool(false)]; + int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_49")]; + tensor concat_178 = concat(axis = concat_178_axis_0, interleave = concat_178_interleave_0, values = (concat_178_values0_0, concat_178_values1_0, concat_178_values2_0, gather_123_cast_uint16_to_int32))[name = string("concat_178")]; + tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_25_cast_fp16 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_178, end_mask = causal_mask_25_end_mask_0, x = causal_mask)[name = string("causal_mask_25_cast_fp16")]; + tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = var_1858_cast_fp16, query = query_states_47_cast_fp16, value = var_1863_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_1871_perm_0 = const()[name = string("op_1871_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; + bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; + int32 gather_115_cast_uint16_to_int32 = cast(dtype = gather_115_cast_uint16_to_int32_dtype_0, x = gather_115_cast_uint16)[name = string("cast_48")]; + tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (gather_114, gather_115_cast_uint16_to_int32, var_69))[name = string("concat_179")]; + tensor var_1871_cast_fp16 = transpose(perm = var_1871_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; + tensor input_89_cast_fp16 = reshape(shape = concat_179, x = var_1871_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479063936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481161152))))[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor hidden_states_301_cast_fp16 = add(x = hidden_states_285_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_301_cast_fp16")]; + fp16 var_64_promoted_23_to_fp16 = const()[name = string("op_64_promoted_23_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1880_cast_fp16 = pow(x = hidden_states_301_cast_fp16, y = var_64_promoted_23_to_fp16)[name = string("op_1880_cast_fp16")]; + tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; + bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; + tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1880_cast_fp16)[name = string("variance_47_cast_fp16")]; + fp16 var_1883_to_fp16 = const()[name = string("op_1883_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1884_cast_fp16 = add(x = variance_47_cast_fp16, y = var_1883_to_fp16)[name = string("op_1884_cast_fp16")]; + fp32 var_1885_epsilon_0 = const()[name = string("op_1885_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1885_cast_fp16 = rsqrt(epsilon = var_1885_epsilon_0, x = var_1884_cast_fp16)[name = string("op_1885_cast_fp16")]; + tensor hidden_states_305_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = var_1885_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481423360)))]; + tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_305_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481427520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489816192))))[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_1897_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_1897_cast_fp16")]; + tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490864832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499253504))))[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_1897_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; + tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500302144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508690816))))[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_83_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor hidden_states_311_cast_fp16 = add(x = hidden_states_301_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; + fp16 var_64_promoted_24_to_fp16 = const()[name = string("op_64_promoted_24_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1910_cast_fp16 = pow(x = hidden_states_311_cast_fp16, y = var_64_promoted_24_to_fp16)[name = string("op_1910_cast_fp16")]; + tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; + bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; + tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_1910_cast_fp16)[name = string("variance_49_cast_fp16")]; + fp16 var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1914_cast_fp16 = add(x = variance_49_cast_fp16, y = var_1913_to_fp16)[name = string("op_1914_cast_fp16")]; + fp32 var_1915_epsilon_0 = const()[name = string("op_1915_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1915_cast_fp16 = rsqrt(epsilon = var_1915_epsilon_0, x = var_1914_cast_fp16)[name = string("op_1915_cast_fp16")]; + tensor hidden_states_315_cast_fp16 = mul(x = hidden_states_311_cast_fp16, y = var_1915_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509739456)))]; + tensor hidden_states_319_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_315_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + tensor var_1926_shape_cast_fp16 = shape(x = hidden_states_319_cast_fp16)[name = string("op_1926_shape_cast_fp16")]; + int32 gather_124 = const()[name = string("gather_124"), val = int32(1)]; + int32 gather_125_axis_0 = const()[name = string("gather_125_axis_0"), val = int32(0)]; + int32 gather_125_batch_dims_0 = const()[name = string("gather_125_batch_dims_0"), val = int32(0)]; + bool gather_125_validate_indices_0 = const()[name = string("gather_125_validate_indices_0"), val = bool(false)]; + string var_1926_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1926_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_125_to_uint16 = const()[name = string("select_125_to_uint16"), val = uint16(1)]; + tensor var_1926_shape_cast_fp16_to_uint16 = cast(dtype = var_1926_shape_cast_fp16_to_uint16_dtype_0, x = var_1926_shape_cast_fp16)[name = string("cast_47")]; + uint16 gather_125_cast_uint16 = gather(axis = gather_125_axis_0, batch_dims = gather_125_batch_dims_0, indices = select_125_to_uint16, validate_indices = gather_125_validate_indices_0, x = var_1926_shape_cast_fp16_to_uint16)[name = string("gather_125_cast_uint16")]; + string gather_125_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_125_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509743616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511840832))))[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_319_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512103040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514200256))))[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_85_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_319_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514462464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516559680))))[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_86_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_319_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor concat_180x = const()[name = string("concat_180x"), val = tensor([1, -1, 32, 64])]; + tensor var_1935_cast_fp16 = reshape(shape = concat_180x, x = linear_84_cast_fp16)[name = string("op_1935_cast_fp16")]; + tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_181x = const()[name = string("concat_181x"), val = tensor([1, -1, 32, 64])]; + tensor var_1938_cast_fp16 = reshape(shape = concat_181x, x = linear_85_cast_fp16)[name = string("op_1938_cast_fp16")]; + tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_182x = const()[name = string("concat_182x"), val = tensor([1, -1, 32, 64])]; + tensor var_1941_cast_fp16 = reshape(shape = concat_182x, x = linear_86_cast_fp16)[name = string("op_1941_cast_fp16")]; + tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_1935_cast_fp16)[name = string("transpose_47")]; + tensor var_1945_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1945_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1956_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1956_cast_fp16")]; + bool var_1958_interleave_0 = const()[name = string("op_1958_interleave_0"), val = bool(false)]; + tensor var_1958_cast_fp16 = concat(axis = var_69, interleave = var_1958_interleave_0, values = (var_1956_cast_fp16, x1_49_cast_fp16))[name = string("op_1958_cast_fp16")]; + tensor var_1959_cast_fp16 = mul(x = var_1958_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1959_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_1945_cast_fp16, y = var_1959_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_1938_cast_fp16)[name = string("transpose_46")]; + tensor var_1961_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1961_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1972_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1972_cast_fp16")]; + bool var_1974_interleave_0 = const()[name = string("op_1974_interleave_0"), val = bool(false)]; + tensor var_1974_cast_fp16 = concat(axis = var_69, interleave = var_1974_interleave_0, values = (var_1972_cast_fp16, x1_51_cast_fp16))[name = string("op_1974_cast_fp16")]; + tensor var_1975_cast_fp16 = mul(x = var_1974_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1975_cast_fp16")]; + tensor k_state_25_cast_fp16 = add(x = var_1961_cast_fp16, y = var_1975_cast_fp16)[name = string("k_state_25_cast_fp16")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor concat_185_values0_0 = const()[name = string("concat_185_values0_0"), val = tensor([12])]; + int32 concat_185_axis_0 = const()[name = string("concat_185_axis_0"), val = int32(0)]; + bool concat_185_interleave_0 = const()[name = string("concat_185_interleave_0"), val = bool(false)]; + tensor concat_185 = concat(axis = concat_185_axis_0, interleave = concat_185_interleave_0, values = (concat_185_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_185")]; + tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_185, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; + tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_1941_cast_fp16)[name = string("transpose_45")]; + tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_185, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; + tensor var_1998_begin_0 = const()[name = string("op_1998_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_1998_end_0 = const()[name = string("op_1998_end_0"), val = tensor([13, 1, 32, 2048, 64])]; + tensor var_1998_end_mask_0 = const()[name = string("op_1998_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1998_squeeze_mask_0 = const()[name = string("op_1998_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1998_cast_fp16 = slice_by_index(begin = var_1998_begin_0, end = var_1998_end_0, end_mask = var_1998_end_mask_0, squeeze_mask = var_1998_squeeze_mask_0, x = coreml_update_state_72)[name = string("op_1998_cast_fp16")]; + tensor var_2001_begin_0 = const()[name = string("op_2001_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2001_end_mask_0 = const()[name = string("op_2001_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2001_cast_fp16 = slice_by_index(begin = var_2001_begin_0, end = concat_11, end_mask = var_2001_end_mask_0, x = var_1998_cast_fp16)[name = string("op_2001_cast_fp16")]; + tensor var_2003_begin_0 = const()[name = string("op_2003_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2003_end_0 = const()[name = string("op_2003_end_0"), val = tensor([13, 1, 32, 2048, 64])]; + tensor var_2003_end_mask_0 = const()[name = string("op_2003_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2003_squeeze_mask_0 = const()[name = string("op_2003_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2003_cast_fp16 = slice_by_index(begin = var_2003_begin_0, end = var_2003_end_0, end_mask = var_2003_end_mask_0, squeeze_mask = var_2003_squeeze_mask_0, x = coreml_update_state_73)[name = string("op_2003_cast_fp16")]; + tensor var_2006_begin_0 = const()[name = string("op_2006_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2006_end_mask_0 = const()[name = string("op_2006_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2006_cast_fp16 = slice_by_index(begin = var_2006_begin_0, end = concat_11, end_mask = var_2006_end_mask_0, x = var_2003_cast_fp16)[name = string("op_2006_cast_fp16")]; + tensor var_2008_shape_cast_fp16 = shape(x = var_2001_cast_fp16)[name = string("op_2008_shape_cast_fp16")]; + int32 gather_133_axis_0 = const()[name = string("gather_133_axis_0"), val = int32(0)]; + int32 gather_133_batch_dims_0 = const()[name = string("gather_133_batch_dims_0"), val = int32(0)]; + bool gather_133_validate_indices_0 = const()[name = string("gather_133_validate_indices_0"), val = bool(false)]; + string var_2008_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2008_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_133_to_uint16 = const()[name = string("select_133_to_uint16"), val = uint16(2)]; + tensor var_2008_shape_cast_fp16_to_uint16 = cast(dtype = var_2008_shape_cast_fp16_to_uint16_dtype_0, x = var_2008_shape_cast_fp16)[name = string("cast_46")]; + uint16 gather_133_cast_uint16 = gather(axis = gather_133_axis_0, batch_dims = gather_133_batch_dims_0, indices = select_133_to_uint16, validate_indices = gather_133_validate_indices_0, x = var_2008_shape_cast_fp16_to_uint16)[name = string("gather_133_cast_uint16")]; + string gather_133_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_133_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_193_values0_0 = const()[name = string("concat_193_values0_0"), val = int32(1)]; + int32 concat_193_values1_0 = const()[name = string("concat_193_values1_0"), val = int32(1)]; + int32 concat_193_values2_0 = const()[name = string("concat_193_values2_0"), val = int32(0)]; + int32 concat_193_axis_0 = const()[name = string("concat_193_axis_0"), val = int32(0)]; + bool concat_193_interleave_0 = const()[name = string("concat_193_interleave_0"), val = bool(false)]; + int32 gather_133_cast_uint16_to_int32 = cast(dtype = gather_133_cast_uint16_to_int32_dtype_0, x = gather_133_cast_uint16)[name = string("cast_45")]; + tensor concat_193 = concat(axis = concat_193_axis_0, interleave = concat_193_interleave_0, values = (concat_193_values0_0, concat_193_values1_0, concat_193_values2_0, gather_133_cast_uint16_to_int32))[name = string("concat_193")]; + tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_27_cast_fp16 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_193, end_mask = causal_mask_27_end_mask_0, x = causal_mask)[name = string("causal_mask_27_cast_fp16")]; + tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = var_2001_cast_fp16, query = query_states_51_cast_fp16, value = var_2006_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_2014_perm_0 = const()[name = string("op_2014_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_194_axis_0 = const()[name = string("concat_194_axis_0"), val = int32(0)]; + bool concat_194_interleave_0 = const()[name = string("concat_194_interleave_0"), val = bool(false)]; + int32 gather_125_cast_uint16_to_int32 = cast(dtype = gather_125_cast_uint16_to_int32_dtype_0, x = gather_125_cast_uint16)[name = string("cast_44")]; + tensor concat_194 = concat(axis = concat_194_axis_0, interleave = concat_194_interleave_0, values = (gather_124, gather_125_cast_uint16_to_int32, var_69))[name = string("concat_194")]; + tensor var_2014_cast_fp16 = transpose(perm = var_2014_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_44")]; + tensor input_97_cast_fp16 = reshape(shape = concat_194, x = var_2014_cast_fp16)[name = string("input_97_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516821888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518919104))))[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_87_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor hidden_states_327_cast_fp16 = add(x = hidden_states_311_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_327_cast_fp16")]; + fp16 var_64_promoted_25_to_fp16 = const()[name = string("op_64_promoted_25_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2023_cast_fp16 = pow(x = hidden_states_327_cast_fp16, y = var_64_promoted_25_to_fp16)[name = string("op_2023_cast_fp16")]; + tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; + bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; + tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2023_cast_fp16)[name = string("variance_51_cast_fp16")]; + fp16 var_2026_to_fp16 = const()[name = string("op_2026_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2027_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2026_to_fp16)[name = string("op_2027_cast_fp16")]; + fp32 var_2028_epsilon_0 = const()[name = string("op_2028_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2028_cast_fp16 = rsqrt(epsilon = var_2028_epsilon_0, x = var_2027_cast_fp16)[name = string("op_2028_cast_fp16")]; + tensor hidden_states_331_cast_fp16 = mul(x = hidden_states_327_cast_fp16, y = var_2028_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519181312)))]; + tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_331_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519185472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527574144))))[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2040_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2040_cast_fp16")]; + tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528622784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537011456))))[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor input_103_cast_fp16 = mul(x = var_2040_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538060096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546448768))))[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_90_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor hidden_states_337_cast_fp16 = add(x = hidden_states_327_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; + fp16 var_64_promoted_26_to_fp16 = const()[name = string("op_64_promoted_26_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2053_cast_fp16 = pow(x = hidden_states_337_cast_fp16, y = var_64_promoted_26_to_fp16)[name = string("op_2053_cast_fp16")]; + tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; + bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; + tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2053_cast_fp16)[name = string("variance_53_cast_fp16")]; + fp16 var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2057_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2056_to_fp16)[name = string("op_2057_cast_fp16")]; + fp32 var_2058_epsilon_0 = const()[name = string("op_2058_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2058_cast_fp16 = rsqrt(epsilon = var_2058_epsilon_0, x = var_2057_cast_fp16)[name = string("op_2058_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = var_2058_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547497408)))]; + tensor hidden_states_345_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_341_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; + tensor var_2069_shape_cast_fp16 = shape(x = hidden_states_345_cast_fp16)[name = string("op_2069_shape_cast_fp16")]; + int32 gather_134 = const()[name = string("gather_134"), val = int32(1)]; + int32 gather_135_axis_0 = const()[name = string("gather_135_axis_0"), val = int32(0)]; + int32 gather_135_batch_dims_0 = const()[name = string("gather_135_batch_dims_0"), val = int32(0)]; + bool gather_135_validate_indices_0 = const()[name = string("gather_135_validate_indices_0"), val = bool(false)]; + string var_2069_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2069_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_135_to_uint16 = const()[name = string("select_135_to_uint16"), val = uint16(1)]; + tensor var_2069_shape_cast_fp16_to_uint16 = cast(dtype = var_2069_shape_cast_fp16_to_uint16_dtype_0, x = var_2069_shape_cast_fp16)[name = string("cast_43")]; + uint16 gather_135_cast_uint16 = gather(axis = gather_135_axis_0, batch_dims = gather_135_batch_dims_0, indices = select_135_to_uint16, validate_indices = gather_135_validate_indices_0, x = var_2069_shape_cast_fp16_to_uint16)[name = string("gather_135_cast_uint16")]; + string gather_135_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_135_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547501568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549598784))))[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_345_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549860992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551958208))))[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_92_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_345_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552220416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554317632))))[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_93_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_345_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor concat_195x = const()[name = string("concat_195x"), val = tensor([1, -1, 32, 64])]; + tensor var_2078_cast_fp16 = reshape(shape = concat_195x, x = linear_91_cast_fp16)[name = string("op_2078_cast_fp16")]; + tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_196x = const()[name = string("concat_196x"), val = tensor([1, -1, 32, 64])]; + tensor var_2081_cast_fp16 = reshape(shape = concat_196x, x = linear_92_cast_fp16)[name = string("op_2081_cast_fp16")]; + tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_197x = const()[name = string("concat_197x"), val = tensor([1, -1, 32, 64])]; + tensor var_2084_cast_fp16 = reshape(shape = concat_197x, x = linear_93_cast_fp16)[name = string("op_2084_cast_fp16")]; + tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2078_cast_fp16)[name = string("transpose_43")]; + tensor var_2088_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2088_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2099_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2099_cast_fp16")]; + bool var_2101_interleave_0 = const()[name = string("op_2101_interleave_0"), val = bool(false)]; + tensor var_2101_cast_fp16 = concat(axis = var_69, interleave = var_2101_interleave_0, values = (var_2099_cast_fp16, x1_53_cast_fp16))[name = string("op_2101_cast_fp16")]; + tensor var_2102_cast_fp16 = mul(x = var_2101_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2102_cast_fp16")]; + tensor query_states_55_cast_fp16 = add(x = var_2088_cast_fp16, y = var_2102_cast_fp16)[name = string("query_states_55_cast_fp16")]; + tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2081_cast_fp16)[name = string("transpose_42")]; + tensor var_2104_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2104_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2115_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2115_cast_fp16")]; + bool var_2117_interleave_0 = const()[name = string("op_2117_interleave_0"), val = bool(false)]; + tensor var_2117_cast_fp16 = concat(axis = var_69, interleave = var_2117_interleave_0, values = (var_2115_cast_fp16, x1_55_cast_fp16))[name = string("op_2117_cast_fp16")]; + tensor var_2118_cast_fp16 = mul(x = var_2117_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2118_cast_fp16")]; + tensor k_state_27_cast_fp16 = add(x = var_2104_cast_fp16, y = var_2118_cast_fp16)[name = string("k_state_27_cast_fp16")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor concat_200_values0_0 = const()[name = string("concat_200_values0_0"), val = tensor([13])]; + int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; + bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; + tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (concat_200_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_200")]; + tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_200, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; + tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2084_cast_fp16)[name = string("transpose_41")]; + tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_200, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; + tensor var_2141_begin_0 = const()[name = string("op_2141_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2141_end_0 = const()[name = string("op_2141_end_0"), val = tensor([14, 1, 32, 2048, 64])]; + tensor var_2141_end_mask_0 = const()[name = string("op_2141_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2141_squeeze_mask_0 = const()[name = string("op_2141_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2141_cast_fp16 = slice_by_index(begin = var_2141_begin_0, end = var_2141_end_0, end_mask = var_2141_end_mask_0, squeeze_mask = var_2141_squeeze_mask_0, x = coreml_update_state_74)[name = string("op_2141_cast_fp16")]; + tensor var_2144_begin_0 = const()[name = string("op_2144_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2144_end_mask_0 = const()[name = string("op_2144_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2144_cast_fp16 = slice_by_index(begin = var_2144_begin_0, end = concat_11, end_mask = var_2144_end_mask_0, x = var_2141_cast_fp16)[name = string("op_2144_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = string("op_2146_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2146_end_0 = const()[name = string("op_2146_end_0"), val = tensor([14, 1, 32, 2048, 64])]; + tensor var_2146_end_mask_0 = const()[name = string("op_2146_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2146_squeeze_mask_0 = const()[name = string("op_2146_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, squeeze_mask = var_2146_squeeze_mask_0, x = coreml_update_state_75)[name = string("op_2146_cast_fp16")]; + tensor var_2149_begin_0 = const()[name = string("op_2149_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2149_end_mask_0 = const()[name = string("op_2149_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2149_cast_fp16 = slice_by_index(begin = var_2149_begin_0, end = concat_11, end_mask = var_2149_end_mask_0, x = var_2146_cast_fp16)[name = string("op_2149_cast_fp16")]; + tensor var_2151_shape_cast_fp16 = shape(x = var_2144_cast_fp16)[name = string("op_2151_shape_cast_fp16")]; + int32 gather_143_axis_0 = const()[name = string("gather_143_axis_0"), val = int32(0)]; + int32 gather_143_batch_dims_0 = const()[name = string("gather_143_batch_dims_0"), val = int32(0)]; + bool gather_143_validate_indices_0 = const()[name = string("gather_143_validate_indices_0"), val = bool(false)]; + string var_2151_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2151_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_143_to_uint16 = const()[name = string("select_143_to_uint16"), val = uint16(2)]; + tensor var_2151_shape_cast_fp16_to_uint16 = cast(dtype = var_2151_shape_cast_fp16_to_uint16_dtype_0, x = var_2151_shape_cast_fp16)[name = string("cast_42")]; + uint16 gather_143_cast_uint16 = gather(axis = gather_143_axis_0, batch_dims = gather_143_batch_dims_0, indices = select_143_to_uint16, validate_indices = gather_143_validate_indices_0, x = var_2151_shape_cast_fp16_to_uint16)[name = string("gather_143_cast_uint16")]; + string gather_143_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_143_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; + int32 concat_208_values1_0 = const()[name = string("concat_208_values1_0"), val = int32(1)]; + int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(0)]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + int32 gather_143_cast_uint16_to_int32 = cast(dtype = gather_143_cast_uint16_to_int32_dtype_0, x = gather_143_cast_uint16)[name = string("cast_41")]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, concat_208_values1_0, concat_208_values2_0, gather_143_cast_uint16_to_int32))[name = string("concat_208")]; + tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_29_cast_fp16 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_208, end_mask = causal_mask_29_end_mask_0, x = causal_mask)[name = string("causal_mask_29_cast_fp16")]; + tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = var_2144_cast_fp16, query = query_states_55_cast_fp16, value = var_2149_cast_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor var_2157_perm_0 = const()[name = string("op_2157_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_209_axis_0 = const()[name = string("concat_209_axis_0"), val = int32(0)]; + bool concat_209_interleave_0 = const()[name = string("concat_209_interleave_0"), val = bool(false)]; + int32 gather_135_cast_uint16_to_int32 = cast(dtype = gather_135_cast_uint16_to_int32_dtype_0, x = gather_135_cast_uint16)[name = string("cast_40")]; + tensor concat_209 = concat(axis = concat_209_axis_0, interleave = concat_209_interleave_0, values = (gather_134, gather_135_cast_uint16_to_int32, var_69))[name = string("concat_209")]; + tensor var_2157_cast_fp16 = transpose(perm = var_2157_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_40")]; + tensor input_105_cast_fp16 = reshape(shape = concat_209, x = var_2157_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554579840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556677056))))[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_94_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_94_cast_fp16")]; + tensor hidden_states_353_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; + fp16 var_64_promoted_27_to_fp16 = const()[name = string("op_64_promoted_27_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2166_cast_fp16 = pow(x = hidden_states_353_cast_fp16, y = var_64_promoted_27_to_fp16)[name = string("op_2166_cast_fp16")]; + tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; + bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; + tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2166_cast_fp16)[name = string("variance_55_cast_fp16")]; + fp16 var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2170_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2169_to_fp16)[name = string("op_2170_cast_fp16")]; + fp32 var_2171_epsilon_0 = const()[name = string("op_2171_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2171_cast_fp16 = rsqrt(epsilon = var_2171_epsilon_0, x = var_2170_cast_fp16)[name = string("op_2171_cast_fp16")]; + tensor hidden_states_357_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = var_2171_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556939264)))]; + tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_357_cast_fp16)[name = string("input_107_cast_fp16")]; + tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556943424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565332096))))[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor var_2183_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2183_cast_fp16")]; + tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566380736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574769408))))[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor input_111_cast_fp16 = mul(x = var_2183_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; + tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575818048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584206720))))[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_97_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor hidden_states_363_cast_fp16 = add(x = hidden_states_353_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; + fp16 var_64_promoted_28_to_fp16 = const()[name = string("op_64_promoted_28_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2196_cast_fp16 = pow(x = hidden_states_363_cast_fp16, y = var_64_promoted_28_to_fp16)[name = string("op_2196_cast_fp16")]; + tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; + bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; + tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2196_cast_fp16)[name = string("variance_57_cast_fp16")]; + fp16 var_2199_to_fp16 = const()[name = string("op_2199_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2200_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2199_to_fp16)[name = string("op_2200_cast_fp16")]; + fp32 var_2201_epsilon_0 = const()[name = string("op_2201_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2201_cast_fp16 = rsqrt(epsilon = var_2201_epsilon_0, x = var_2200_cast_fp16)[name = string("op_2201_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = mul(x = hidden_states_363_cast_fp16, y = var_2201_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585255360)))]; + tensor hidden_states_371_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor var_2212_shape_cast_fp16 = shape(x = hidden_states_371_cast_fp16)[name = string("op_2212_shape_cast_fp16")]; + int32 gather_144 = const()[name = string("gather_144"), val = int32(1)]; + int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; + int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; + bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; + string var_2212_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2212_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(1)]; + tensor var_2212_shape_cast_fp16_to_uint16 = cast(dtype = var_2212_shape_cast_fp16_to_uint16_dtype_0, x = var_2212_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_2212_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; + string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585259520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587356736))))[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_371_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587618944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589716160))))[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_99_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_371_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589978368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592075584))))[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_100_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_371_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 32, 64])]; + tensor var_2221_cast_fp16 = reshape(shape = concat_210x, x = linear_98_cast_fp16)[name = string("op_2221_cast_fp16")]; + tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 32, 64])]; + tensor var_2224_cast_fp16 = reshape(shape = concat_211x, x = linear_99_cast_fp16)[name = string("op_2224_cast_fp16")]; + tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 32, 64])]; + tensor var_2227_cast_fp16 = reshape(shape = concat_212x, x = linear_100_cast_fp16)[name = string("op_2227_cast_fp16")]; + tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2221_cast_fp16)[name = string("transpose_39")]; + tensor var_2231_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2231_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2242_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2242_cast_fp16")]; + bool var_2244_interleave_0 = const()[name = string("op_2244_interleave_0"), val = bool(false)]; + tensor var_2244_cast_fp16 = concat(axis = var_69, interleave = var_2244_interleave_0, values = (var_2242_cast_fp16, x1_57_cast_fp16))[name = string("op_2244_cast_fp16")]; + tensor var_2245_cast_fp16 = mul(x = var_2244_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2245_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_2231_cast_fp16, y = var_2245_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2224_cast_fp16)[name = string("transpose_38")]; + tensor var_2247_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2247_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2258_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2258_cast_fp16")]; + bool var_2260_interleave_0 = const()[name = string("op_2260_interleave_0"), val = bool(false)]; + tensor var_2260_cast_fp16 = concat(axis = var_69, interleave = var_2260_interleave_0, values = (var_2258_cast_fp16, x1_59_cast_fp16))[name = string("op_2260_cast_fp16")]; + tensor var_2261_cast_fp16 = mul(x = var_2260_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2261_cast_fp16")]; + tensor k_state_29_cast_fp16 = add(x = var_2247_cast_fp16, y = var_2261_cast_fp16)[name = string("k_state_29_cast_fp16")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor concat_215_values0_0 = const()[name = string("concat_215_values0_0"), val = tensor([14])]; + int32 concat_215_axis_0 = const()[name = string("concat_215_axis_0"), val = int32(0)]; + bool concat_215_interleave_0 = const()[name = string("concat_215_interleave_0"), val = bool(false)]; + tensor concat_215 = concat(axis = concat_215_axis_0, interleave = concat_215_interleave_0, values = (concat_215_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_215")]; + tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_215, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; + tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_2227_cast_fp16)[name = string("transpose_37")]; + tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_215, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; + tensor var_2284_begin_0 = const()[name = string("op_2284_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2284_end_0 = const()[name = string("op_2284_end_0"), val = tensor([15, 1, 32, 2048, 64])]; + tensor var_2284_end_mask_0 = const()[name = string("op_2284_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2284_squeeze_mask_0 = const()[name = string("op_2284_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, squeeze_mask = var_2284_squeeze_mask_0, x = coreml_update_state_76)[name = string("op_2284_cast_fp16")]; + tensor var_2287_begin_0 = const()[name = string("op_2287_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2287_end_mask_0 = const()[name = string("op_2287_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2287_cast_fp16 = slice_by_index(begin = var_2287_begin_0, end = concat_11, end_mask = var_2287_end_mask_0, x = var_2284_cast_fp16)[name = string("op_2287_cast_fp16")]; + tensor var_2289_begin_0 = const()[name = string("op_2289_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2289_end_0 = const()[name = string("op_2289_end_0"), val = tensor([15, 1, 32, 2048, 64])]; + tensor var_2289_end_mask_0 = const()[name = string("op_2289_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2289_squeeze_mask_0 = const()[name = string("op_2289_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2289_cast_fp16 = slice_by_index(begin = var_2289_begin_0, end = var_2289_end_0, end_mask = var_2289_end_mask_0, squeeze_mask = var_2289_squeeze_mask_0, x = coreml_update_state_77)[name = string("op_2289_cast_fp16")]; + tensor var_2292_begin_0 = const()[name = string("op_2292_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2292_end_mask_0 = const()[name = string("op_2292_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2292_cast_fp16 = slice_by_index(begin = var_2292_begin_0, end = concat_11, end_mask = var_2292_end_mask_0, x = var_2289_cast_fp16)[name = string("op_2292_cast_fp16")]; + tensor var_2294_shape_cast_fp16 = shape(x = var_2287_cast_fp16)[name = string("op_2294_shape_cast_fp16")]; + int32 gather_153_axis_0 = const()[name = string("gather_153_axis_0"), val = int32(0)]; + int32 gather_153_batch_dims_0 = const()[name = string("gather_153_batch_dims_0"), val = int32(0)]; + bool gather_153_validate_indices_0 = const()[name = string("gather_153_validate_indices_0"), val = bool(false)]; + string var_2294_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2294_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_153_to_uint16 = const()[name = string("select_153_to_uint16"), val = uint16(2)]; + tensor var_2294_shape_cast_fp16_to_uint16 = cast(dtype = var_2294_shape_cast_fp16_to_uint16_dtype_0, x = var_2294_shape_cast_fp16)[name = string("cast_38")]; + uint16 gather_153_cast_uint16 = gather(axis = gather_153_axis_0, batch_dims = gather_153_batch_dims_0, indices = select_153_to_uint16, validate_indices = gather_153_validate_indices_0, x = var_2294_shape_cast_fp16_to_uint16)[name = string("gather_153_cast_uint16")]; + string gather_153_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_153_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_223_values0_0 = const()[name = string("concat_223_values0_0"), val = int32(1)]; + int32 concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = int32(1)]; + int32 concat_223_values2_0 = const()[name = string("concat_223_values2_0"), val = int32(0)]; + int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; + bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; + int32 gather_153_cast_uint16_to_int32 = cast(dtype = gather_153_cast_uint16_to_int32_dtype_0, x = gather_153_cast_uint16)[name = string("cast_37")]; + tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (concat_223_values0_0, concat_223_values1_0, concat_223_values2_0, gather_153_cast_uint16_to_int32))[name = string("concat_223")]; + tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_31_cast_fp16 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_223, end_mask = causal_mask_31_end_mask_0, x = causal_mask)[name = string("causal_mask_31_cast_fp16")]; + tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = var_2287_cast_fp16, query = query_states_59_cast_fp16, value = var_2292_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_2300_perm_0 = const()[name = string("op_2300_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_36")]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_144, gather_145_cast_uint16_to_int32, var_69))[name = string("concat_224")]; + tensor var_2300_cast_fp16 = transpose(perm = var_2300_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_36")]; + tensor input_113_cast_fp16 = reshape(shape = concat_224, x = var_2300_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592337792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594435008))))[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor hidden_states_379_cast_fp16 = add(x = hidden_states_363_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; + fp16 var_64_promoted_29_to_fp16 = const()[name = string("op_64_promoted_29_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2309_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_64_promoted_29_to_fp16)[name = string("op_2309_cast_fp16")]; + tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; + bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; + tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2309_cast_fp16)[name = string("variance_59_cast_fp16")]; + fp16 var_2312_to_fp16 = const()[name = string("op_2312_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2313_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2312_to_fp16)[name = string("op_2313_cast_fp16")]; + fp32 var_2314_epsilon_0 = const()[name = string("op_2314_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2314_cast_fp16 = rsqrt(epsilon = var_2314_epsilon_0, x = var_2313_cast_fp16)[name = string("op_2314_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_2314_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594697216)))]; + tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_115_cast_fp16")]; + tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594701376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603090048))))[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_2326_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2326_cast_fp16")]; + tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(604138688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612527360))))[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_2326_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613576000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621964672))))[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_104_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + fp16 var_64_promoted_30_to_fp16 = const()[name = string("op_64_promoted_30_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2339_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_64_promoted_30_to_fp16)[name = string("op_2339_cast_fp16")]; + tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; + bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; + tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2339_cast_fp16)[name = string("variance_61_cast_fp16")]; + fp16 var_2342_to_fp16 = const()[name = string("op_2342_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2343_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2342_to_fp16)[name = string("op_2343_cast_fp16")]; + fp32 var_2344_epsilon_0 = const()[name = string("op_2344_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2344_cast_fp16 = rsqrt(epsilon = var_2344_epsilon_0, x = var_2343_cast_fp16)[name = string("op_2344_cast_fp16")]; + tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2344_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623013312)))]; + tensor hidden_states_397_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_393_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; + tensor var_2355_shape_cast_fp16 = shape(x = hidden_states_397_cast_fp16)[name = string("op_2355_shape_cast_fp16")]; + int32 gather_154 = const()[name = string("gather_154"), val = int32(1)]; + int32 gather_155_axis_0 = const()[name = string("gather_155_axis_0"), val = int32(0)]; + int32 gather_155_batch_dims_0 = const()[name = string("gather_155_batch_dims_0"), val = int32(0)]; + bool gather_155_validate_indices_0 = const()[name = string("gather_155_validate_indices_0"), val = bool(false)]; + string var_2355_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2355_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_155_to_uint16 = const()[name = string("select_155_to_uint16"), val = uint16(1)]; + tensor var_2355_shape_cast_fp16_to_uint16 = cast(dtype = var_2355_shape_cast_fp16_to_uint16_dtype_0, x = var_2355_shape_cast_fp16)[name = string("cast_35")]; + uint16 gather_155_cast_uint16 = gather(axis = gather_155_axis_0, batch_dims = gather_155_batch_dims_0, indices = select_155_to_uint16, validate_indices = gather_155_validate_indices_0, x = var_2355_shape_cast_fp16_to_uint16)[name = string("gather_155_cast_uint16")]; + string gather_155_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_155_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623017472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625114688))))[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625376896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627474112))))[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_106_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627736320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629833536))))[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_107_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, -1, 32, 64])]; + tensor var_2364_cast_fp16 = reshape(shape = concat_225x, x = linear_105_cast_fp16)[name = string("op_2364_cast_fp16")]; + tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_226x = const()[name = string("concat_226x"), val = tensor([1, -1, 32, 64])]; + tensor var_2367_cast_fp16 = reshape(shape = concat_226x, x = linear_106_cast_fp16)[name = string("op_2367_cast_fp16")]; + tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_227x = const()[name = string("concat_227x"), val = tensor([1, -1, 32, 64])]; + tensor var_2370_cast_fp16 = reshape(shape = concat_227x, x = linear_107_cast_fp16)[name = string("op_2370_cast_fp16")]; + tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_2364_cast_fp16)[name = string("transpose_35")]; + tensor var_2374_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2374_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2385_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2385_cast_fp16")]; + bool var_2387_interleave_0 = const()[name = string("op_2387_interleave_0"), val = bool(false)]; + tensor var_2387_cast_fp16 = concat(axis = var_69, interleave = var_2387_interleave_0, values = (var_2385_cast_fp16, x1_61_cast_fp16))[name = string("op_2387_cast_fp16")]; + tensor var_2388_cast_fp16 = mul(x = var_2387_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2388_cast_fp16")]; + tensor query_states_63_cast_fp16 = add(x = var_2374_cast_fp16, y = var_2388_cast_fp16)[name = string("query_states_63_cast_fp16")]; + tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_2367_cast_fp16)[name = string("transpose_34")]; + tensor var_2390_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2390_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2401_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2401_cast_fp16")]; + bool var_2403_interleave_0 = const()[name = string("op_2403_interleave_0"), val = bool(false)]; + tensor var_2403_cast_fp16 = concat(axis = var_69, interleave = var_2403_interleave_0, values = (var_2401_cast_fp16, x1_63_cast_fp16))[name = string("op_2403_cast_fp16")]; + tensor var_2404_cast_fp16 = mul(x = var_2403_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2404_cast_fp16")]; + tensor k_state_31_cast_fp16 = add(x = var_2390_cast_fp16, y = var_2404_cast_fp16)[name = string("k_state_31_cast_fp16")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = tensor([15])]; + int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; + bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; + tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_230")]; + tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_230, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; + tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_31_cast_fp16 = transpose(perm = v_state_31_perm_0, x = var_2370_cast_fp16)[name = string("transpose_33")]; + tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_230, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; + tensor var_2427_begin_0 = const()[name = string("op_2427_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2427_end_0 = const()[name = string("op_2427_end_0"), val = tensor([16, 1, 32, 2048, 64])]; + tensor var_2427_end_mask_0 = const()[name = string("op_2427_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2427_squeeze_mask_0 = const()[name = string("op_2427_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2427_cast_fp16 = slice_by_index(begin = var_2427_begin_0, end = var_2427_end_0, end_mask = var_2427_end_mask_0, squeeze_mask = var_2427_squeeze_mask_0, x = coreml_update_state_78)[name = string("op_2427_cast_fp16")]; + tensor var_2430_begin_0 = const()[name = string("op_2430_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2430_end_mask_0 = const()[name = string("op_2430_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2430_cast_fp16 = slice_by_index(begin = var_2430_begin_0, end = concat_11, end_mask = var_2430_end_mask_0, x = var_2427_cast_fp16)[name = string("op_2430_cast_fp16")]; + tensor var_2432_begin_0 = const()[name = string("op_2432_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2432_end_0 = const()[name = string("op_2432_end_0"), val = tensor([16, 1, 32, 2048, 64])]; + tensor var_2432_end_mask_0 = const()[name = string("op_2432_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2432_squeeze_mask_0 = const()[name = string("op_2432_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2432_cast_fp16 = slice_by_index(begin = var_2432_begin_0, end = var_2432_end_0, end_mask = var_2432_end_mask_0, squeeze_mask = var_2432_squeeze_mask_0, x = coreml_update_state_79)[name = string("op_2432_cast_fp16")]; + tensor var_2435_begin_0 = const()[name = string("op_2435_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2435_end_mask_0 = const()[name = string("op_2435_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2435_cast_fp16 = slice_by_index(begin = var_2435_begin_0, end = concat_11, end_mask = var_2435_end_mask_0, x = var_2432_cast_fp16)[name = string("op_2435_cast_fp16")]; + tensor var_2437_shape_cast_fp16 = shape(x = var_2430_cast_fp16)[name = string("op_2437_shape_cast_fp16")]; + int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; + int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; + bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; + string var_2437_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2437_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; + tensor var_2437_shape_cast_fp16_to_uint16 = cast(dtype = var_2437_shape_cast_fp16_to_uint16_dtype_0, x = var_2437_shape_cast_fp16)[name = string("cast_34")]; + uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_2437_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; + string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_238_values0_0 = const()[name = string("concat_238_values0_0"), val = int32(1)]; + int32 concat_238_values1_0 = const()[name = string("concat_238_values1_0"), val = int32(1)]; + int32 concat_238_values2_0 = const()[name = string("concat_238_values2_0"), val = int32(0)]; + int32 concat_238_axis_0 = const()[name = string("concat_238_axis_0"), val = int32(0)]; + bool concat_238_interleave_0 = const()[name = string("concat_238_interleave_0"), val = bool(false)]; + int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_33")]; + tensor concat_238 = concat(axis = concat_238_axis_0, interleave = concat_238_interleave_0, values = (concat_238_values0_0, concat_238_values1_0, concat_238_values2_0, gather_163_cast_uint16_to_int32))[name = string("concat_238")]; + tensor causal_mask_33_begin_0 = const()[name = string("causal_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_33_end_mask_0 = const()[name = string("causal_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_33_cast_fp16 = slice_by_index(begin = causal_mask_33_begin_0, end = concat_238, end_mask = causal_mask_33_end_mask_0, x = causal_mask)[name = string("causal_mask_33_cast_fp16")]; + tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_33_cast_fp16, key = var_2430_cast_fp16, query = query_states_63_cast_fp16, value = var_2435_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_2443_perm_0 = const()[name = string("op_2443_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_239_axis_0 = const()[name = string("concat_239_axis_0"), val = int32(0)]; + bool concat_239_interleave_0 = const()[name = string("concat_239_interleave_0"), val = bool(false)]; + int32 gather_155_cast_uint16_to_int32 = cast(dtype = gather_155_cast_uint16_to_int32_dtype_0, x = gather_155_cast_uint16)[name = string("cast_32")]; + tensor concat_239 = concat(axis = concat_239_axis_0, interleave = concat_239_interleave_0, values = (gather_154, gather_155_cast_uint16_to_int32, var_69))[name = string("concat_239")]; + tensor var_2443_cast_fp16 = transpose(perm = var_2443_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_32")]; + tensor input_121_cast_fp16 = reshape(shape = concat_239, x = var_2443_cast_fp16)[name = string("input_121_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630095744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632192960))))[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor hidden_states_405_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + fp16 var_64_promoted_31_to_fp16 = const()[name = string("op_64_promoted_31_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2452_cast_fp16 = pow(x = hidden_states_405_cast_fp16, y = var_64_promoted_31_to_fp16)[name = string("op_2452_cast_fp16")]; + tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; + bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; + tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_2452_cast_fp16)[name = string("variance_63_cast_fp16")]; + fp16 var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2456_cast_fp16 = add(x = variance_63_cast_fp16, y = var_2455_to_fp16)[name = string("op_2456_cast_fp16")]; + fp32 var_2457_epsilon_0 = const()[name = string("op_2457_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2457_cast_fp16 = rsqrt(epsilon = var_2457_epsilon_0, x = var_2456_cast_fp16)[name = string("op_2457_cast_fp16")]; + tensor hidden_states_409_cast_fp16 = mul(x = hidden_states_405_cast_fp16, y = var_2457_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632455168)))]; + tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_409_cast_fp16)[name = string("input_123_cast_fp16")]; + tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632459328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640848000))))[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_2469_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_2469_cast_fp16")]; + tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641896640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650285312))))[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor input_127_cast_fp16 = mul(x = var_2469_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; + tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651333952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659722624))))[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_111_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor hidden_states_415_cast_fp16 = add(x = hidden_states_405_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_415_cast_fp16")]; + fp16 var_64_promoted_32_to_fp16 = const()[name = string("op_64_promoted_32_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2482_cast_fp16 = pow(x = hidden_states_415_cast_fp16, y = var_64_promoted_32_to_fp16)[name = string("op_2482_cast_fp16")]; + tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; + bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; + tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_2482_cast_fp16)[name = string("variance_65_cast_fp16")]; + fp16 var_2485_to_fp16 = const()[name = string("op_2485_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2486_cast_fp16 = add(x = variance_65_cast_fp16, y = var_2485_to_fp16)[name = string("op_2486_cast_fp16")]; + fp32 var_2487_epsilon_0 = const()[name = string("op_2487_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2487_cast_fp16 = rsqrt(epsilon = var_2487_epsilon_0, x = var_2486_cast_fp16)[name = string("op_2487_cast_fp16")]; + tensor hidden_states_419_cast_fp16 = mul(x = hidden_states_415_cast_fp16, y = var_2487_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; + tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660771264)))]; + tensor hidden_states_423_cast_fp16 = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_419_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; + tensor var_2498_shape_cast_fp16 = shape(x = hidden_states_423_cast_fp16)[name = string("op_2498_shape_cast_fp16")]; + int32 gather_164 = const()[name = string("gather_164"), val = int32(1)]; + int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; + int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; + bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; + string var_2498_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2498_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(1)]; + tensor var_2498_shape_cast_fp16_to_uint16 = cast(dtype = var_2498_shape_cast_fp16_to_uint16_dtype_0, x = var_2498_shape_cast_fp16)[name = string("cast_31")]; + uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_2498_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; + string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660775424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662872640))))[name = string("model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_423_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663134848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665232064))))[name = string("model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_113_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_423_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665494272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667591488))))[name = string("model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_114_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_423_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor concat_240x = const()[name = string("concat_240x"), val = tensor([1, -1, 32, 64])]; + tensor var_2507_cast_fp16 = reshape(shape = concat_240x, x = linear_112_cast_fp16)[name = string("op_2507_cast_fp16")]; + tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_241x = const()[name = string("concat_241x"), val = tensor([1, -1, 32, 64])]; + tensor var_2510_cast_fp16 = reshape(shape = concat_241x, x = linear_113_cast_fp16)[name = string("op_2510_cast_fp16")]; + tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 32, 64])]; + tensor var_2513_cast_fp16 = reshape(shape = concat_242x, x = linear_114_cast_fp16)[name = string("op_2513_cast_fp16")]; + tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_2507_cast_fp16)[name = string("transpose_31")]; + tensor var_2517_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2517_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2528_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2528_cast_fp16")]; + bool var_2530_interleave_0 = const()[name = string("op_2530_interleave_0"), val = bool(false)]; + tensor var_2530_cast_fp16 = concat(axis = var_69, interleave = var_2530_interleave_0, values = (var_2528_cast_fp16, x1_65_cast_fp16))[name = string("op_2530_cast_fp16")]; + tensor var_2531_cast_fp16 = mul(x = var_2530_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2531_cast_fp16")]; + tensor query_states_67_cast_fp16 = add(x = var_2517_cast_fp16, y = var_2531_cast_fp16)[name = string("query_states_67_cast_fp16")]; + tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_2510_cast_fp16)[name = string("transpose_30")]; + tensor var_2533_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2533_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2544_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2544_cast_fp16")]; + bool var_2546_interleave_0 = const()[name = string("op_2546_interleave_0"), val = bool(false)]; + tensor var_2546_cast_fp16 = concat(axis = var_69, interleave = var_2546_interleave_0, values = (var_2544_cast_fp16, x1_67_cast_fp16))[name = string("op_2546_cast_fp16")]; + tensor var_2547_cast_fp16 = mul(x = var_2546_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2547_cast_fp16")]; + tensor k_state_33_cast_fp16 = add(x = var_2533_cast_fp16, y = var_2547_cast_fp16)[name = string("k_state_33_cast_fp16")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = tensor([16])]; + int32 concat_245_axis_0 = const()[name = string("concat_245_axis_0"), val = int32(0)]; + bool concat_245_interleave_0 = const()[name = string("concat_245_interleave_0"), val = bool(false)]; + tensor concat_245 = concat(axis = concat_245_axis_0, interleave = concat_245_interleave_0, values = (concat_245_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_245")]; + tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_245, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; + tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_33_cast_fp16 = transpose(perm = v_state_33_perm_0, x = var_2513_cast_fp16)[name = string("transpose_29")]; + tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_245, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; + tensor var_2570_begin_0 = const()[name = string("op_2570_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_2570_end_0 = const()[name = string("op_2570_end_0"), val = tensor([17, 1, 32, 2048, 64])]; + tensor var_2570_end_mask_0 = const()[name = string("op_2570_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2570_squeeze_mask_0 = const()[name = string("op_2570_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2570_cast_fp16 = slice_by_index(begin = var_2570_begin_0, end = var_2570_end_0, end_mask = var_2570_end_mask_0, squeeze_mask = var_2570_squeeze_mask_0, x = coreml_update_state_80)[name = string("op_2570_cast_fp16")]; + tensor var_2573_begin_0 = const()[name = string("op_2573_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2573_end_mask_0 = const()[name = string("op_2573_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2573_cast_fp16 = slice_by_index(begin = var_2573_begin_0, end = concat_11, end_mask = var_2573_end_mask_0, x = var_2570_cast_fp16)[name = string("op_2573_cast_fp16")]; + tensor var_2575_begin_0 = const()[name = string("op_2575_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_2575_end_0 = const()[name = string("op_2575_end_0"), val = tensor([17, 1, 32, 2048, 64])]; + tensor var_2575_end_mask_0 = const()[name = string("op_2575_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2575_squeeze_mask_0 = const()[name = string("op_2575_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2575_cast_fp16 = slice_by_index(begin = var_2575_begin_0, end = var_2575_end_0, end_mask = var_2575_end_mask_0, squeeze_mask = var_2575_squeeze_mask_0, x = coreml_update_state_81)[name = string("op_2575_cast_fp16")]; + tensor var_2578_begin_0 = const()[name = string("op_2578_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2578_end_mask_0 = const()[name = string("op_2578_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2578_cast_fp16 = slice_by_index(begin = var_2578_begin_0, end = concat_11, end_mask = var_2578_end_mask_0, x = var_2575_cast_fp16)[name = string("op_2578_cast_fp16")]; + tensor var_2580_shape_cast_fp16 = shape(x = var_2573_cast_fp16)[name = string("op_2580_shape_cast_fp16")]; + int32 gather_173_axis_0 = const()[name = string("gather_173_axis_0"), val = int32(0)]; + int32 gather_173_batch_dims_0 = const()[name = string("gather_173_batch_dims_0"), val = int32(0)]; + bool gather_173_validate_indices_0 = const()[name = string("gather_173_validate_indices_0"), val = bool(false)]; + string var_2580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_173_to_uint16 = const()[name = string("select_173_to_uint16"), val = uint16(2)]; + tensor var_2580_shape_cast_fp16_to_uint16 = cast(dtype = var_2580_shape_cast_fp16_to_uint16_dtype_0, x = var_2580_shape_cast_fp16)[name = string("cast_30")]; + uint16 gather_173_cast_uint16 = gather(axis = gather_173_axis_0, batch_dims = gather_173_batch_dims_0, indices = select_173_to_uint16, validate_indices = gather_173_validate_indices_0, x = var_2580_shape_cast_fp16_to_uint16)[name = string("gather_173_cast_uint16")]; + string gather_173_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_173_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_253_values0_0 = const()[name = string("concat_253_values0_0"), val = int32(1)]; + int32 concat_253_values1_0 = const()[name = string("concat_253_values1_0"), val = int32(1)]; + int32 concat_253_values2_0 = const()[name = string("concat_253_values2_0"), val = int32(0)]; + int32 concat_253_axis_0 = const()[name = string("concat_253_axis_0"), val = int32(0)]; + bool concat_253_interleave_0 = const()[name = string("concat_253_interleave_0"), val = bool(false)]; + int32 gather_173_cast_uint16_to_int32 = cast(dtype = gather_173_cast_uint16_to_int32_dtype_0, x = gather_173_cast_uint16)[name = string("cast_29")]; + tensor concat_253 = concat(axis = concat_253_axis_0, interleave = concat_253_interleave_0, values = (concat_253_values0_0, concat_253_values1_0, concat_253_values2_0, gather_173_cast_uint16_to_int32))[name = string("concat_253")]; + tensor causal_mask_35_begin_0 = const()[name = string("causal_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_35_end_mask_0 = const()[name = string("causal_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_35_cast_fp16 = slice_by_index(begin = causal_mask_35_begin_0, end = concat_253, end_mask = causal_mask_35_end_mask_0, x = causal_mask)[name = string("causal_mask_35_cast_fp16")]; + tensor attn_output_65_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_35_cast_fp16, key = var_2573_cast_fp16, query = query_states_67_cast_fp16, value = var_2578_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_2586_perm_0 = const()[name = string("op_2586_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; + bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; + int32 gather_165_cast_uint16_to_int32 = cast(dtype = gather_165_cast_uint16_to_int32_dtype_0, x = gather_165_cast_uint16)[name = string("cast_28")]; + tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (gather_164, gather_165_cast_uint16_to_int32, var_69))[name = string("concat_254")]; + tensor var_2586_cast_fp16 = transpose(perm = var_2586_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_28")]; + tensor input_129_cast_fp16 = reshape(shape = concat_254, x = var_2586_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(667853696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669950912))))[name = string("model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_115_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor hidden_states_431_cast_fp16 = add(x = hidden_states_415_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; + fp16 var_64_promoted_33_to_fp16 = const()[name = string("op_64_promoted_33_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2595_cast_fp16 = pow(x = hidden_states_431_cast_fp16, y = var_64_promoted_33_to_fp16)[name = string("op_2595_cast_fp16")]; + tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; + bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; + tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_2595_cast_fp16)[name = string("variance_67_cast_fp16")]; + fp16 var_2598_to_fp16 = const()[name = string("op_2598_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2599_cast_fp16 = add(x = variance_67_cast_fp16, y = var_2598_to_fp16)[name = string("op_2599_cast_fp16")]; + fp32 var_2600_epsilon_0 = const()[name = string("op_2600_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2600_cast_fp16 = rsqrt(epsilon = var_2600_epsilon_0, x = var_2599_cast_fp16)[name = string("op_2600_cast_fp16")]; + tensor hidden_states_435_cast_fp16 = mul(x = hidden_states_431_cast_fp16, y = var_2600_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; + tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670213120)))]; + tensor input_131_cast_fp16 = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_435_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670217280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(678605952))))[name = string("model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_2612_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_2612_cast_fp16")]; + tensor model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679654592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688043264))))[name = string("model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_2612_cast_fp16, y = linear_117_cast_fp16)[name = string("input_135_cast_fp16")]; + tensor model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689091904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697480576))))[name = string("model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_118_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = string("linear_118_cast_fp16")]; + tensor hidden_states_441_cast_fp16 = add(x = hidden_states_431_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_441_cast_fp16")]; + fp16 var_64_promoted_34_to_fp16 = const()[name = string("op_64_promoted_34_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2625_cast_fp16 = pow(x = hidden_states_441_cast_fp16, y = var_64_promoted_34_to_fp16)[name = string("op_2625_cast_fp16")]; + tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; + bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; + tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_2625_cast_fp16)[name = string("variance_69_cast_fp16")]; + fp16 var_2628_to_fp16 = const()[name = string("op_2628_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2629_cast_fp16 = add(x = variance_69_cast_fp16, y = var_2628_to_fp16)[name = string("op_2629_cast_fp16")]; + fp32 var_2630_epsilon_0 = const()[name = string("op_2630_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2630_cast_fp16 = rsqrt(epsilon = var_2630_epsilon_0, x = var_2629_cast_fp16)[name = string("op_2630_cast_fp16")]; + tensor hidden_states_445_cast_fp16 = mul(x = hidden_states_441_cast_fp16, y = var_2630_cast_fp16)[name = string("hidden_states_445_cast_fp16")]; + tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698529216)))]; + tensor hidden_states_449_cast_fp16 = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_445_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; + tensor var_2641_shape_cast_fp16 = shape(x = hidden_states_449_cast_fp16)[name = string("op_2641_shape_cast_fp16")]; + int32 gather_174 = const()[name = string("gather_174"), val = int32(1)]; + int32 gather_175_axis_0 = const()[name = string("gather_175_axis_0"), val = int32(0)]; + int32 gather_175_batch_dims_0 = const()[name = string("gather_175_batch_dims_0"), val = int32(0)]; + bool gather_175_validate_indices_0 = const()[name = string("gather_175_validate_indices_0"), val = bool(false)]; + string var_2641_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2641_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_175_to_uint16 = const()[name = string("select_175_to_uint16"), val = uint16(1)]; + tensor var_2641_shape_cast_fp16_to_uint16 = cast(dtype = var_2641_shape_cast_fp16_to_uint16_dtype_0, x = var_2641_shape_cast_fp16)[name = string("cast_27")]; + uint16 gather_175_cast_uint16 = gather(axis = gather_175_axis_0, batch_dims = gather_175_batch_dims_0, indices = select_175_to_uint16, validate_indices = gather_175_validate_indices_0, x = var_2641_shape_cast_fp16_to_uint16)[name = string("gather_175_cast_uint16")]; + string gather_175_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_175_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698533376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700630592))))[name = string("model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_449_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700892800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702990016))))[name = string("model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_120_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_449_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(703252224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705349440))))[name = string("model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_121_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_449_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 32, 64])]; + tensor var_2650_cast_fp16 = reshape(shape = concat_255x, x = linear_119_cast_fp16)[name = string("op_2650_cast_fp16")]; + tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 32, 64])]; + tensor var_2653_cast_fp16 = reshape(shape = concat_256x, x = linear_120_cast_fp16)[name = string("op_2653_cast_fp16")]; + tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_257x = const()[name = string("concat_257x"), val = tensor([1, -1, 32, 64])]; + tensor var_2656_cast_fp16 = reshape(shape = concat_257x, x = linear_121_cast_fp16)[name = string("op_2656_cast_fp16")]; + tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_2650_cast_fp16)[name = string("transpose_27")]; + tensor var_2660_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2660_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2671_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_2671_cast_fp16")]; + bool var_2673_interleave_0 = const()[name = string("op_2673_interleave_0"), val = bool(false)]; + tensor var_2673_cast_fp16 = concat(axis = var_69, interleave = var_2673_interleave_0, values = (var_2671_cast_fp16, x1_69_cast_fp16))[name = string("op_2673_cast_fp16")]; + tensor var_2674_cast_fp16 = mul(x = var_2673_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2674_cast_fp16")]; + tensor query_states_71_cast_fp16 = add(x = var_2660_cast_fp16, y = var_2674_cast_fp16)[name = string("query_states_71_cast_fp16")]; + tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_2653_cast_fp16)[name = string("transpose_26")]; + tensor var_2676_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2676_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2687_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2687_cast_fp16")]; + bool var_2689_interleave_0 = const()[name = string("op_2689_interleave_0"), val = bool(false)]; + tensor var_2689_cast_fp16 = concat(axis = var_69, interleave = var_2689_interleave_0, values = (var_2687_cast_fp16, x1_71_cast_fp16))[name = string("op_2689_cast_fp16")]; + tensor var_2690_cast_fp16 = mul(x = var_2689_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2690_cast_fp16")]; + tensor k_state_35_cast_fp16 = add(x = var_2676_cast_fp16, y = var_2690_cast_fp16)[name = string("k_state_35_cast_fp16")]; + tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; + tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; + tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; + tensor concat_260_values0_0 = const()[name = string("concat_260_values0_0"), val = tensor([17])]; + int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; + bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; + tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (concat_260_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_260")]; + tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_260, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; + tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_35_cast_fp16 = transpose(perm = v_state_35_perm_0, x = var_2656_cast_fp16)[name = string("transpose_25")]; + tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_260, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; + tensor var_2713_begin_0 = const()[name = string("op_2713_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_2713_end_0 = const()[name = string("op_2713_end_0"), val = tensor([18, 1, 32, 2048, 64])]; + tensor var_2713_end_mask_0 = const()[name = string("op_2713_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2713_squeeze_mask_0 = const()[name = string("op_2713_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2713_cast_fp16 = slice_by_index(begin = var_2713_begin_0, end = var_2713_end_0, end_mask = var_2713_end_mask_0, squeeze_mask = var_2713_squeeze_mask_0, x = coreml_update_state_82)[name = string("op_2713_cast_fp16")]; + tensor var_2716_begin_0 = const()[name = string("op_2716_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2716_end_mask_0 = const()[name = string("op_2716_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2716_cast_fp16 = slice_by_index(begin = var_2716_begin_0, end = concat_11, end_mask = var_2716_end_mask_0, x = var_2713_cast_fp16)[name = string("op_2716_cast_fp16")]; + tensor var_2718_begin_0 = const()[name = string("op_2718_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_2718_end_0 = const()[name = string("op_2718_end_0"), val = tensor([18, 1, 32, 2048, 64])]; + tensor var_2718_end_mask_0 = const()[name = string("op_2718_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2718_squeeze_mask_0 = const()[name = string("op_2718_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2718_cast_fp16 = slice_by_index(begin = var_2718_begin_0, end = var_2718_end_0, end_mask = var_2718_end_mask_0, squeeze_mask = var_2718_squeeze_mask_0, x = coreml_update_state_83)[name = string("op_2718_cast_fp16")]; + tensor var_2721_begin_0 = const()[name = string("op_2721_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2721_end_mask_0 = const()[name = string("op_2721_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2721_cast_fp16 = slice_by_index(begin = var_2721_begin_0, end = concat_11, end_mask = var_2721_end_mask_0, x = var_2718_cast_fp16)[name = string("op_2721_cast_fp16")]; + tensor var_2723_shape_cast_fp16 = shape(x = var_2716_cast_fp16)[name = string("op_2723_shape_cast_fp16")]; + int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; + int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; + bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; + string var_2723_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2723_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; + tensor var_2723_shape_cast_fp16_to_uint16 = cast(dtype = var_2723_shape_cast_fp16_to_uint16_dtype_0, x = var_2723_shape_cast_fp16)[name = string("cast_26")]; + uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_2723_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; + string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = int32(1)]; + int32 concat_268_values1_0 = const()[name = string("concat_268_values1_0"), val = int32(1)]; + int32 concat_268_values2_0 = const()[name = string("concat_268_values2_0"), val = int32(0)]; + int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; + bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; + int32 gather_183_cast_uint16_to_int32 = cast(dtype = gather_183_cast_uint16_to_int32_dtype_0, x = gather_183_cast_uint16)[name = string("cast_25")]; + tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, concat_268_values1_0, concat_268_values2_0, gather_183_cast_uint16_to_int32))[name = string("concat_268")]; + tensor causal_mask_37_begin_0 = const()[name = string("causal_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_37_end_mask_0 = const()[name = string("causal_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_37_cast_fp16 = slice_by_index(begin = causal_mask_37_begin_0, end = concat_268, end_mask = causal_mask_37_end_mask_0, x = causal_mask)[name = string("causal_mask_37_cast_fp16")]; + tensor attn_output_69_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_37_cast_fp16, key = var_2716_cast_fp16, query = query_states_71_cast_fp16, value = var_2721_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_2729_perm_0 = const()[name = string("op_2729_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; + bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; + int32 gather_175_cast_uint16_to_int32 = cast(dtype = gather_175_cast_uint16_to_int32_dtype_0, x = gather_175_cast_uint16)[name = string("cast_24")]; + tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (gather_174, gather_175_cast_uint16_to_int32, var_69))[name = string("concat_269")]; + tensor var_2729_cast_fp16 = transpose(perm = var_2729_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_24")]; + tensor input_137_cast_fp16 = reshape(shape = concat_269, x = var_2729_cast_fp16)[name = string("input_137_cast_fp16")]; + tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705611648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707708864))))[name = string("model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_122_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor hidden_states_457_cast_fp16 = add(x = hidden_states_441_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; + fp16 var_64_promoted_35_to_fp16 = const()[name = string("op_64_promoted_35_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2738_cast_fp16 = pow(x = hidden_states_457_cast_fp16, y = var_64_promoted_35_to_fp16)[name = string("op_2738_cast_fp16")]; + tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; + bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; + tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_2738_cast_fp16)[name = string("variance_71_cast_fp16")]; + fp16 var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2742_cast_fp16 = add(x = variance_71_cast_fp16, y = var_2741_to_fp16)[name = string("op_2742_cast_fp16")]; + fp32 var_2743_epsilon_0 = const()[name = string("op_2743_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2743_cast_fp16 = rsqrt(epsilon = var_2743_epsilon_0, x = var_2742_cast_fp16)[name = string("op_2743_cast_fp16")]; + tensor hidden_states_461_cast_fp16 = mul(x = hidden_states_457_cast_fp16, y = var_2743_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; + tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707971072)))]; + tensor input_139_cast_fp16 = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_461_cast_fp16)[name = string("input_139_cast_fp16")]; + tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707975232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716363904))))[name = string("model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor var_2755_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_2755_cast_fp16")]; + tensor model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(717412544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725801216))))[name = string("model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor input_143_cast_fp16 = mul(x = var_2755_cast_fp16, y = linear_124_cast_fp16)[name = string("input_143_cast_fp16")]; + tensor model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726849856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735238528))))[name = string("model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_125_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor hidden_states_467_cast_fp16 = add(x = hidden_states_457_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_467_cast_fp16")]; + fp16 var_64_promoted_36_to_fp16 = const()[name = string("op_64_promoted_36_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2768_cast_fp16 = pow(x = hidden_states_467_cast_fp16, y = var_64_promoted_36_to_fp16)[name = string("op_2768_cast_fp16")]; + tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; + bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; + tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_2768_cast_fp16)[name = string("variance_73_cast_fp16")]; + fp16 var_2771_to_fp16 = const()[name = string("op_2771_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2772_cast_fp16 = add(x = variance_73_cast_fp16, y = var_2771_to_fp16)[name = string("op_2772_cast_fp16")]; + fp32 var_2773_epsilon_0 = const()[name = string("op_2773_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2773_cast_fp16 = rsqrt(epsilon = var_2773_epsilon_0, x = var_2772_cast_fp16)[name = string("op_2773_cast_fp16")]; + tensor hidden_states_471_cast_fp16 = mul(x = hidden_states_467_cast_fp16, y = var_2773_cast_fp16)[name = string("hidden_states_471_cast_fp16")]; + tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736287168)))]; + tensor hidden_states_475_cast_fp16 = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_471_cast_fp16)[name = string("hidden_states_475_cast_fp16")]; + tensor var_2784_shape_cast_fp16 = shape(x = hidden_states_475_cast_fp16)[name = string("op_2784_shape_cast_fp16")]; + int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; + int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; + int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; + bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; + string var_2784_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2784_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_185_to_uint16 = const()[name = string("select_185_to_uint16"), val = uint16(1)]; + tensor var_2784_shape_cast_fp16_to_uint16 = cast(dtype = var_2784_shape_cast_fp16_to_uint16_dtype_0, x = var_2784_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_2784_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; + string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736291328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738388544))))[name = string("model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_475_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738650752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740747968))))[name = string("model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_127_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_475_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741010176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743107392))))[name = string("model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_128_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_475_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor concat_270x = const()[name = string("concat_270x"), val = tensor([1, -1, 32, 64])]; + tensor var_2793_cast_fp16 = reshape(shape = concat_270x, x = linear_126_cast_fp16)[name = string("op_2793_cast_fp16")]; + tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_271x = const()[name = string("concat_271x"), val = tensor([1, -1, 32, 64])]; + tensor var_2796_cast_fp16 = reshape(shape = concat_271x, x = linear_127_cast_fp16)[name = string("op_2796_cast_fp16")]; + tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_272x = const()[name = string("concat_272x"), val = tensor([1, -1, 32, 64])]; + tensor var_2799_cast_fp16 = reshape(shape = concat_272x, x = linear_128_cast_fp16)[name = string("op_2799_cast_fp16")]; + tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_2793_cast_fp16)[name = string("transpose_23")]; + tensor var_2803_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2803_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2814_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_2814_cast_fp16")]; + bool var_2816_interleave_0 = const()[name = string("op_2816_interleave_0"), val = bool(false)]; + tensor var_2816_cast_fp16 = concat(axis = var_69, interleave = var_2816_interleave_0, values = (var_2814_cast_fp16, x1_73_cast_fp16))[name = string("op_2816_cast_fp16")]; + tensor var_2817_cast_fp16 = mul(x = var_2816_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2817_cast_fp16")]; + tensor query_states_75_cast_fp16 = add(x = var_2803_cast_fp16, y = var_2817_cast_fp16)[name = string("query_states_75_cast_fp16")]; + tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_2796_cast_fp16)[name = string("transpose_22")]; + tensor var_2819_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2819_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2830_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2830_cast_fp16")]; + bool var_2832_interleave_0 = const()[name = string("op_2832_interleave_0"), val = bool(false)]; + tensor var_2832_cast_fp16 = concat(axis = var_69, interleave = var_2832_interleave_0, values = (var_2830_cast_fp16, x1_75_cast_fp16))[name = string("op_2832_cast_fp16")]; + tensor var_2833_cast_fp16 = mul(x = var_2832_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2833_cast_fp16")]; + tensor k_state_37_cast_fp16 = add(x = var_2819_cast_fp16, y = var_2833_cast_fp16)[name = string("k_state_37_cast_fp16")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor concat_275_values0_0 = const()[name = string("concat_275_values0_0"), val = tensor([18])]; + int32 concat_275_axis_0 = const()[name = string("concat_275_axis_0"), val = int32(0)]; + bool concat_275_interleave_0 = const()[name = string("concat_275_interleave_0"), val = bool(false)]; + tensor concat_275 = concat(axis = concat_275_axis_0, interleave = concat_275_interleave_0, values = (concat_275_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_275")]; + tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_275, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; + tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_37_cast_fp16 = transpose(perm = v_state_37_perm_0, x = var_2799_cast_fp16)[name = string("transpose_21")]; + tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_275, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; + tensor var_2856_begin_0 = const()[name = string("op_2856_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_2856_end_0 = const()[name = string("op_2856_end_0"), val = tensor([19, 1, 32, 2048, 64])]; + tensor var_2856_end_mask_0 = const()[name = string("op_2856_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2856_squeeze_mask_0 = const()[name = string("op_2856_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2856_cast_fp16 = slice_by_index(begin = var_2856_begin_0, end = var_2856_end_0, end_mask = var_2856_end_mask_0, squeeze_mask = var_2856_squeeze_mask_0, x = coreml_update_state_84)[name = string("op_2856_cast_fp16")]; + tensor var_2859_begin_0 = const()[name = string("op_2859_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2859_end_mask_0 = const()[name = string("op_2859_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2859_cast_fp16 = slice_by_index(begin = var_2859_begin_0, end = concat_11, end_mask = var_2859_end_mask_0, x = var_2856_cast_fp16)[name = string("op_2859_cast_fp16")]; + tensor var_2861_begin_0 = const()[name = string("op_2861_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_2861_end_0 = const()[name = string("op_2861_end_0"), val = tensor([19, 1, 32, 2048, 64])]; + tensor var_2861_end_mask_0 = const()[name = string("op_2861_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2861_squeeze_mask_0 = const()[name = string("op_2861_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2861_cast_fp16 = slice_by_index(begin = var_2861_begin_0, end = var_2861_end_0, end_mask = var_2861_end_mask_0, squeeze_mask = var_2861_squeeze_mask_0, x = coreml_update_state_85)[name = string("op_2861_cast_fp16")]; + tensor var_2864_begin_0 = const()[name = string("op_2864_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2864_end_mask_0 = const()[name = string("op_2864_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2864_cast_fp16 = slice_by_index(begin = var_2864_begin_0, end = concat_11, end_mask = var_2864_end_mask_0, x = var_2861_cast_fp16)[name = string("op_2864_cast_fp16")]; + tensor var_2866_shape_cast_fp16 = shape(x = var_2859_cast_fp16)[name = string("op_2866_shape_cast_fp16")]; + int32 gather_193_axis_0 = const()[name = string("gather_193_axis_0"), val = int32(0)]; + int32 gather_193_batch_dims_0 = const()[name = string("gather_193_batch_dims_0"), val = int32(0)]; + bool gather_193_validate_indices_0 = const()[name = string("gather_193_validate_indices_0"), val = bool(false)]; + string var_2866_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2866_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_193_to_uint16 = const()[name = string("select_193_to_uint16"), val = uint16(2)]; + tensor var_2866_shape_cast_fp16_to_uint16 = cast(dtype = var_2866_shape_cast_fp16_to_uint16_dtype_0, x = var_2866_shape_cast_fp16)[name = string("cast_22")]; + uint16 gather_193_cast_uint16 = gather(axis = gather_193_axis_0, batch_dims = gather_193_batch_dims_0, indices = select_193_to_uint16, validate_indices = gather_193_validate_indices_0, x = var_2866_shape_cast_fp16_to_uint16)[name = string("gather_193_cast_uint16")]; + string gather_193_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_193_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; + int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; + int32 concat_283_values2_0 = const()[name = string("concat_283_values2_0"), val = int32(0)]; + int32 concat_283_axis_0 = const()[name = string("concat_283_axis_0"), val = int32(0)]; + bool concat_283_interleave_0 = const()[name = string("concat_283_interleave_0"), val = bool(false)]; + int32 gather_193_cast_uint16_to_int32 = cast(dtype = gather_193_cast_uint16_to_int32_dtype_0, x = gather_193_cast_uint16)[name = string("cast_21")]; + tensor concat_283 = concat(axis = concat_283_axis_0, interleave = concat_283_interleave_0, values = (concat_283_values0_0, concat_283_values1_0, concat_283_values2_0, gather_193_cast_uint16_to_int32))[name = string("concat_283")]; + tensor causal_mask_39_begin_0 = const()[name = string("causal_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_39_end_mask_0 = const()[name = string("causal_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_39_cast_fp16 = slice_by_index(begin = causal_mask_39_begin_0, end = concat_283, end_mask = causal_mask_39_end_mask_0, x = causal_mask)[name = string("causal_mask_39_cast_fp16")]; + tensor attn_output_73_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_39_cast_fp16, key = var_2859_cast_fp16, query = query_states_75_cast_fp16, value = var_2864_cast_fp16)[name = string("attn_output_73_cast_fp16")]; + tensor var_2872_perm_0 = const()[name = string("op_2872_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_284_axis_0 = const()[name = string("concat_284_axis_0"), val = int32(0)]; + bool concat_284_interleave_0 = const()[name = string("concat_284_interleave_0"), val = bool(false)]; + int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_20")]; + tensor concat_284 = concat(axis = concat_284_axis_0, interleave = concat_284_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_69))[name = string("concat_284")]; + tensor var_2872_cast_fp16 = transpose(perm = var_2872_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_20")]; + tensor input_145_cast_fp16 = reshape(shape = concat_284, x = var_2872_cast_fp16)[name = string("input_145_cast_fp16")]; + tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743369600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745466816))))[name = string("model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_129_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor hidden_states_483_cast_fp16 = add(x = hidden_states_467_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; + fp16 var_64_promoted_37_to_fp16 = const()[name = string("op_64_promoted_37_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2881_cast_fp16 = pow(x = hidden_states_483_cast_fp16, y = var_64_promoted_37_to_fp16)[name = string("op_2881_cast_fp16")]; + tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; + bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; + tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_2881_cast_fp16)[name = string("variance_75_cast_fp16")]; + fp16 var_2884_to_fp16 = const()[name = string("op_2884_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2885_cast_fp16 = add(x = variance_75_cast_fp16, y = var_2884_to_fp16)[name = string("op_2885_cast_fp16")]; + fp32 var_2886_epsilon_0 = const()[name = string("op_2886_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2886_cast_fp16 = rsqrt(epsilon = var_2886_epsilon_0, x = var_2885_cast_fp16)[name = string("op_2886_cast_fp16")]; + tensor hidden_states_487_cast_fp16 = mul(x = hidden_states_483_cast_fp16, y = var_2886_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; + tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745729024)))]; + tensor input_147_cast_fp16 = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_487_cast_fp16)[name = string("input_147_cast_fp16")]; + tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745733184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754121856))))[name = string("model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_2898_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_2898_cast_fp16")]; + tensor model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755170496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763559168))))[name = string("model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = var_2898_cast_fp16, y = linear_131_cast_fp16)[name = string("input_151_cast_fp16")]; + tensor model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764607808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(772996480))))[name = string("model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_132_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor hidden_states_493_cast_fp16 = add(x = hidden_states_483_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_493_cast_fp16")]; + fp16 var_64_promoted_38_to_fp16 = const()[name = string("op_64_promoted_38_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2911_cast_fp16 = pow(x = hidden_states_493_cast_fp16, y = var_64_promoted_38_to_fp16)[name = string("op_2911_cast_fp16")]; + tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; + bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; + tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_2911_cast_fp16)[name = string("variance_77_cast_fp16")]; + fp16 var_2914_to_fp16 = const()[name = string("op_2914_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2915_cast_fp16 = add(x = variance_77_cast_fp16, y = var_2914_to_fp16)[name = string("op_2915_cast_fp16")]; + fp32 var_2916_epsilon_0 = const()[name = string("op_2916_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2916_cast_fp16 = rsqrt(epsilon = var_2916_epsilon_0, x = var_2915_cast_fp16)[name = string("op_2916_cast_fp16")]; + tensor hidden_states_497_cast_fp16 = mul(x = hidden_states_493_cast_fp16, y = var_2916_cast_fp16)[name = string("hidden_states_497_cast_fp16")]; + tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774045120)))]; + tensor hidden_states_501_cast_fp16 = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_497_cast_fp16)[name = string("hidden_states_501_cast_fp16")]; + tensor var_2927_shape_cast_fp16 = shape(x = hidden_states_501_cast_fp16)[name = string("op_2927_shape_cast_fp16")]; + int32 gather_194 = const()[name = string("gather_194"), val = int32(1)]; + int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; + int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; + bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; + string var_2927_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2927_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(1)]; + tensor var_2927_shape_cast_fp16_to_uint16 = cast(dtype = var_2927_shape_cast_fp16_to_uint16_dtype_0, x = var_2927_shape_cast_fp16)[name = string("cast_19")]; + uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2927_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; + string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774049280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776146496))))[name = string("model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_501_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776408704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778505920))))[name = string("model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_134_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_501_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778768128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(780865344))))[name = string("model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_135_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_501_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 32, 64])]; + tensor var_2936_cast_fp16 = reshape(shape = concat_285x, x = linear_133_cast_fp16)[name = string("op_2936_cast_fp16")]; + tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 32, 64])]; + tensor var_2939_cast_fp16 = reshape(shape = concat_286x, x = linear_134_cast_fp16)[name = string("op_2939_cast_fp16")]; + tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 32, 64])]; + tensor var_2942_cast_fp16 = reshape(shape = concat_287x, x = linear_135_cast_fp16)[name = string("op_2942_cast_fp16")]; + tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_2936_cast_fp16)[name = string("transpose_19")]; + tensor var_2946_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2946_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2957_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_2957_cast_fp16")]; + bool var_2959_interleave_0 = const()[name = string("op_2959_interleave_0"), val = bool(false)]; + tensor var_2959_cast_fp16 = concat(axis = var_69, interleave = var_2959_interleave_0, values = (var_2957_cast_fp16, x1_77_cast_fp16))[name = string("op_2959_cast_fp16")]; + tensor var_2960_cast_fp16 = mul(x = var_2959_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2960_cast_fp16")]; + tensor query_states_79_cast_fp16 = add(x = var_2946_cast_fp16, y = var_2960_cast_fp16)[name = string("query_states_79_cast_fp16")]; + tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_2939_cast_fp16)[name = string("transpose_18")]; + tensor var_2962_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2962_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2973_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_2973_cast_fp16")]; + bool var_2975_interleave_0 = const()[name = string("op_2975_interleave_0"), val = bool(false)]; + tensor var_2975_cast_fp16 = concat(axis = var_69, interleave = var_2975_interleave_0, values = (var_2973_cast_fp16, x1_79_cast_fp16))[name = string("op_2975_cast_fp16")]; + tensor var_2976_cast_fp16 = mul(x = var_2975_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2976_cast_fp16")]; + tensor k_state_39_cast_fp16 = add(x = var_2962_cast_fp16, y = var_2976_cast_fp16)[name = string("k_state_39_cast_fp16")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([19])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_290")]; + tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; + tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_39_cast_fp16 = transpose(perm = v_state_39_perm_0, x = var_2942_cast_fp16)[name = string("transpose_17")]; + tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; + tensor var_2999_begin_0 = const()[name = string("op_2999_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_2999_end_0 = const()[name = string("op_2999_end_0"), val = tensor([20, 1, 32, 2048, 64])]; + tensor var_2999_end_mask_0 = const()[name = string("op_2999_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2999_squeeze_mask_0 = const()[name = string("op_2999_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2999_cast_fp16 = slice_by_index(begin = var_2999_begin_0, end = var_2999_end_0, end_mask = var_2999_end_mask_0, squeeze_mask = var_2999_squeeze_mask_0, x = coreml_update_state_86)[name = string("op_2999_cast_fp16")]; + tensor var_3002_begin_0 = const()[name = string("op_3002_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3002_end_mask_0 = const()[name = string("op_3002_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3002_cast_fp16 = slice_by_index(begin = var_3002_begin_0, end = concat_11, end_mask = var_3002_end_mask_0, x = var_2999_cast_fp16)[name = string("op_3002_cast_fp16")]; + tensor var_3004_begin_0 = const()[name = string("op_3004_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_3004_end_0 = const()[name = string("op_3004_end_0"), val = tensor([20, 1, 32, 2048, 64])]; + tensor var_3004_end_mask_0 = const()[name = string("op_3004_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3004_squeeze_mask_0 = const()[name = string("op_3004_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3004_cast_fp16 = slice_by_index(begin = var_3004_begin_0, end = var_3004_end_0, end_mask = var_3004_end_mask_0, squeeze_mask = var_3004_squeeze_mask_0, x = coreml_update_state_87)[name = string("op_3004_cast_fp16")]; + tensor var_3007_begin_0 = const()[name = string("op_3007_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3007_end_mask_0 = const()[name = string("op_3007_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3007_cast_fp16 = slice_by_index(begin = var_3007_begin_0, end = concat_11, end_mask = var_3007_end_mask_0, x = var_3004_cast_fp16)[name = string("op_3007_cast_fp16")]; + tensor var_3009_shape_cast_fp16 = shape(x = var_3002_cast_fp16)[name = string("op_3009_shape_cast_fp16")]; + int32 gather_203_axis_0 = const()[name = string("gather_203_axis_0"), val = int32(0)]; + int32 gather_203_batch_dims_0 = const()[name = string("gather_203_batch_dims_0"), val = int32(0)]; + bool gather_203_validate_indices_0 = const()[name = string("gather_203_validate_indices_0"), val = bool(false)]; + string var_3009_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3009_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_203_to_uint16 = const()[name = string("select_203_to_uint16"), val = uint16(2)]; + tensor var_3009_shape_cast_fp16_to_uint16 = cast(dtype = var_3009_shape_cast_fp16_to_uint16_dtype_0, x = var_3009_shape_cast_fp16)[name = string("cast_18")]; + uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_3009_shape_cast_fp16_to_uint16)[name = string("gather_203_cast_uint16")]; + string gather_203_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_203_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_298_values0_0 = const()[name = string("concat_298_values0_0"), val = int32(1)]; + int32 concat_298_values1_0 = const()[name = string("concat_298_values1_0"), val = int32(1)]; + int32 concat_298_values2_0 = const()[name = string("concat_298_values2_0"), val = int32(0)]; + int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; + bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; + int32 gather_203_cast_uint16_to_int32 = cast(dtype = gather_203_cast_uint16_to_int32_dtype_0, x = gather_203_cast_uint16)[name = string("cast_17")]; + tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (concat_298_values0_0, concat_298_values1_0, concat_298_values2_0, gather_203_cast_uint16_to_int32))[name = string("concat_298")]; + tensor causal_mask_41_begin_0 = const()[name = string("causal_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_41_end_mask_0 = const()[name = string("causal_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_41_cast_fp16 = slice_by_index(begin = causal_mask_41_begin_0, end = concat_298, end_mask = causal_mask_41_end_mask_0, x = causal_mask)[name = string("causal_mask_41_cast_fp16")]; + tensor attn_output_77_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_41_cast_fp16, key = var_3002_cast_fp16, query = query_states_79_cast_fp16, value = var_3007_cast_fp16)[name = string("attn_output_77_cast_fp16")]; + tensor var_3015_perm_0 = const()[name = string("op_3015_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_299_axis_0 = const()[name = string("concat_299_axis_0"), val = int32(0)]; + bool concat_299_interleave_0 = const()[name = string("concat_299_interleave_0"), val = bool(false)]; + int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_16")]; + tensor concat_299 = concat(axis = concat_299_axis_0, interleave = concat_299_interleave_0, values = (gather_194, gather_195_cast_uint16_to_int32, var_69))[name = string("concat_299")]; + tensor var_3015_cast_fp16 = transpose(perm = var_3015_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_16")]; + tensor input_153_cast_fp16 = reshape(shape = concat_299, x = var_3015_cast_fp16)[name = string("input_153_cast_fp16")]; + tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781127552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783224768))))[name = string("model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_136_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor hidden_states_509_cast_fp16 = add(x = hidden_states_493_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; + fp16 var_64_promoted_39_to_fp16 = const()[name = string("op_64_promoted_39_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3024_cast_fp16 = pow(x = hidden_states_509_cast_fp16, y = var_64_promoted_39_to_fp16)[name = string("op_3024_cast_fp16")]; + tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; + bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; + tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_3024_cast_fp16)[name = string("variance_79_cast_fp16")]; + fp16 var_3027_to_fp16 = const()[name = string("op_3027_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3028_cast_fp16 = add(x = variance_79_cast_fp16, y = var_3027_to_fp16)[name = string("op_3028_cast_fp16")]; + fp32 var_3029_epsilon_0 = const()[name = string("op_3029_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3029_cast_fp16 = rsqrt(epsilon = var_3029_epsilon_0, x = var_3028_cast_fp16)[name = string("op_3029_cast_fp16")]; + tensor hidden_states_513_cast_fp16 = mul(x = hidden_states_509_cast_fp16, y = var_3029_cast_fp16)[name = string("hidden_states_513_cast_fp16")]; + tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783486976)))]; + tensor input_155_cast_fp16 = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_513_cast_fp16)[name = string("input_155_cast_fp16")]; + tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783491136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791879808))))[name = string("model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3041_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3041_cast_fp16")]; + tensor model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792928448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801317120))))[name = string("model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor input_159_cast_fp16 = mul(x = var_3041_cast_fp16, y = linear_138_cast_fp16)[name = string("input_159_cast_fp16")]; + tensor model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802365760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810754432))))[name = string("model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_139_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor hidden_states_519_cast_fp16 = add(x = hidden_states_509_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_519_cast_fp16")]; + fp16 var_64_promoted_40_to_fp16 = const()[name = string("op_64_promoted_40_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3054_cast_fp16 = pow(x = hidden_states_519_cast_fp16, y = var_64_promoted_40_to_fp16)[name = string("op_3054_cast_fp16")]; + tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; + bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; + tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_3054_cast_fp16)[name = string("variance_81_cast_fp16")]; + fp16 var_3057_to_fp16 = const()[name = string("op_3057_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3058_cast_fp16 = add(x = variance_81_cast_fp16, y = var_3057_to_fp16)[name = string("op_3058_cast_fp16")]; + fp32 var_3059_epsilon_0 = const()[name = string("op_3059_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3059_cast_fp16 = rsqrt(epsilon = var_3059_epsilon_0, x = var_3058_cast_fp16)[name = string("op_3059_cast_fp16")]; + tensor hidden_states_523_cast_fp16 = mul(x = hidden_states_519_cast_fp16, y = var_3059_cast_fp16)[name = string("hidden_states_523_cast_fp16")]; + tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(811803072)))]; + tensor hidden_states_527_cast_fp16 = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_523_cast_fp16)[name = string("hidden_states_527_cast_fp16")]; + tensor var_3070_shape_cast_fp16 = shape(x = hidden_states_527_cast_fp16)[name = string("op_3070_shape_cast_fp16")]; + int32 gather_204 = const()[name = string("gather_204"), val = int32(1)]; + int32 gather_205_axis_0 = const()[name = string("gather_205_axis_0"), val = int32(0)]; + int32 gather_205_batch_dims_0 = const()[name = string("gather_205_batch_dims_0"), val = int32(0)]; + bool gather_205_validate_indices_0 = const()[name = string("gather_205_validate_indices_0"), val = bool(false)]; + string var_3070_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3070_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_205_to_uint16 = const()[name = string("select_205_to_uint16"), val = uint16(1)]; + tensor var_3070_shape_cast_fp16_to_uint16 = cast(dtype = var_3070_shape_cast_fp16_to_uint16_dtype_0, x = var_3070_shape_cast_fp16)[name = string("cast_15")]; + uint16 gather_205_cast_uint16 = gather(axis = gather_205_axis_0, batch_dims = gather_205_batch_dims_0, indices = select_205_to_uint16, validate_indices = gather_205_validate_indices_0, x = var_3070_shape_cast_fp16_to_uint16)[name = string("gather_205_cast_uint16")]; + string gather_205_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_205_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(811807232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813904448))))[name = string("model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_527_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814166656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816263872))))[name = string("model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_141_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_527_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816526080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818623296))))[name = string("model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_142_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_527_cast_fp16)[name = string("linear_142_cast_fp16")]; + tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 32, 64])]; + tensor var_3079_cast_fp16 = reshape(shape = concat_300x, x = linear_140_cast_fp16)[name = string("op_3079_cast_fp16")]; + tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, -1, 32, 64])]; + tensor var_3082_cast_fp16 = reshape(shape = concat_301x, x = linear_141_cast_fp16)[name = string("op_3082_cast_fp16")]; + tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_302x = const()[name = string("concat_302x"), val = tensor([1, -1, 32, 64])]; + tensor var_3085_cast_fp16 = reshape(shape = concat_302x, x = linear_142_cast_fp16)[name = string("op_3085_cast_fp16")]; + tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_3079_cast_fp16)[name = string("transpose_15")]; + tensor var_3089_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3089_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3100_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3100_cast_fp16")]; + bool var_3102_interleave_0 = const()[name = string("op_3102_interleave_0"), val = bool(false)]; + tensor var_3102_cast_fp16 = concat(axis = var_69, interleave = var_3102_interleave_0, values = (var_3100_cast_fp16, x1_81_cast_fp16))[name = string("op_3102_cast_fp16")]; + tensor var_3103_cast_fp16 = mul(x = var_3102_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3103_cast_fp16")]; + tensor query_states_83_cast_fp16 = add(x = var_3089_cast_fp16, y = var_3103_cast_fp16)[name = string("query_states_83_cast_fp16")]; + tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_3082_cast_fp16)[name = string("transpose_14")]; + tensor var_3105_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3105_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3116_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3116_cast_fp16")]; + bool var_3118_interleave_0 = const()[name = string("op_3118_interleave_0"), val = bool(false)]; + tensor var_3118_cast_fp16 = concat(axis = var_69, interleave = var_3118_interleave_0, values = (var_3116_cast_fp16, x1_83_cast_fp16))[name = string("op_3118_cast_fp16")]; + tensor var_3119_cast_fp16 = mul(x = var_3118_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3119_cast_fp16")]; + tensor k_state_41_cast_fp16 = add(x = var_3105_cast_fp16, y = var_3119_cast_fp16)[name = string("k_state_41_cast_fp16")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor concat_305_values0_0 = const()[name = string("concat_305_values0_0"), val = tensor([20])]; + int32 concat_305_axis_0 = const()[name = string("concat_305_axis_0"), val = int32(0)]; + bool concat_305_interleave_0 = const()[name = string("concat_305_interleave_0"), val = bool(false)]; + tensor concat_305 = concat(axis = concat_305_axis_0, interleave = concat_305_interleave_0, values = (concat_305_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_305")]; + tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_305, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; + tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_41_cast_fp16 = transpose(perm = v_state_41_perm_0, x = var_3085_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_305, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; + tensor var_3142_begin_0 = const()[name = string("op_3142_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3142_end_0 = const()[name = string("op_3142_end_0"), val = tensor([21, 1, 32, 2048, 64])]; + tensor var_3142_end_mask_0 = const()[name = string("op_3142_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3142_squeeze_mask_0 = const()[name = string("op_3142_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3142_cast_fp16 = slice_by_index(begin = var_3142_begin_0, end = var_3142_end_0, end_mask = var_3142_end_mask_0, squeeze_mask = var_3142_squeeze_mask_0, x = coreml_update_state_88)[name = string("op_3142_cast_fp16")]; + tensor var_3145_begin_0 = const()[name = string("op_3145_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3145_end_mask_0 = const()[name = string("op_3145_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3145_cast_fp16 = slice_by_index(begin = var_3145_begin_0, end = concat_11, end_mask = var_3145_end_mask_0, x = var_3142_cast_fp16)[name = string("op_3145_cast_fp16")]; + tensor var_3147_begin_0 = const()[name = string("op_3147_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3147_end_0 = const()[name = string("op_3147_end_0"), val = tensor([21, 1, 32, 2048, 64])]; + tensor var_3147_end_mask_0 = const()[name = string("op_3147_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3147_squeeze_mask_0 = const()[name = string("op_3147_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3147_cast_fp16 = slice_by_index(begin = var_3147_begin_0, end = var_3147_end_0, end_mask = var_3147_end_mask_0, squeeze_mask = var_3147_squeeze_mask_0, x = coreml_update_state_89)[name = string("op_3147_cast_fp16")]; + tensor var_3150_begin_0 = const()[name = string("op_3150_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3150_end_mask_0 = const()[name = string("op_3150_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3150_cast_fp16 = slice_by_index(begin = var_3150_begin_0, end = concat_11, end_mask = var_3150_end_mask_0, x = var_3147_cast_fp16)[name = string("op_3150_cast_fp16")]; + tensor var_3152_shape_cast_fp16 = shape(x = var_3145_cast_fp16)[name = string("op_3152_shape_cast_fp16")]; + int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; + int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; + bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; + string var_3152_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3152_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; + tensor var_3152_shape_cast_fp16_to_uint16 = cast(dtype = var_3152_shape_cast_fp16_to_uint16_dtype_0, x = var_3152_shape_cast_fp16)[name = string("cast_14")]; + uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_3152_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; + string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = int32(1)]; + int32 concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = int32(1)]; + int32 concat_313_values2_0 = const()[name = string("concat_313_values2_0"), val = int32(0)]; + int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; + bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; + int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_13")]; + tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, concat_313_values2_0, gather_213_cast_uint16_to_int32))[name = string("concat_313")]; + tensor causal_mask_43_begin_0 = const()[name = string("causal_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_43_end_mask_0 = const()[name = string("causal_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_43_cast_fp16 = slice_by_index(begin = causal_mask_43_begin_0, end = concat_313, end_mask = causal_mask_43_end_mask_0, x = causal_mask)[name = string("causal_mask_43_cast_fp16")]; + tensor attn_output_81_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_43_cast_fp16, key = var_3145_cast_fp16, query = query_states_83_cast_fp16, value = var_3150_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_3158_perm_0 = const()[name = string("op_3158_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_314_axis_0 = const()[name = string("concat_314_axis_0"), val = int32(0)]; + bool concat_314_interleave_0 = const()[name = string("concat_314_interleave_0"), val = bool(false)]; + int32 gather_205_cast_uint16_to_int32 = cast(dtype = gather_205_cast_uint16_to_int32_dtype_0, x = gather_205_cast_uint16)[name = string("cast_12")]; + tensor concat_314 = concat(axis = concat_314_axis_0, interleave = concat_314_interleave_0, values = (gather_204, gather_205_cast_uint16_to_int32, var_69))[name = string("concat_314")]; + tensor var_3158_cast_fp16 = transpose(perm = var_3158_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_12")]; + tensor input_161_cast_fp16 = reshape(shape = concat_314, x = var_3158_cast_fp16)[name = string("input_161_cast_fp16")]; + tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818885504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820982720))))[name = string("model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_143_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor hidden_states_535_cast_fp16 = add(x = hidden_states_519_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_535_cast_fp16")]; + fp16 var_64_promoted_41_to_fp16 = const()[name = string("op_64_promoted_41_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3167_cast_fp16 = pow(x = hidden_states_535_cast_fp16, y = var_64_promoted_41_to_fp16)[name = string("op_3167_cast_fp16")]; + tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; + bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; + tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_3167_cast_fp16)[name = string("variance_83_cast_fp16")]; + fp16 var_3170_to_fp16 = const()[name = string("op_3170_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3171_cast_fp16 = add(x = variance_83_cast_fp16, y = var_3170_to_fp16)[name = string("op_3171_cast_fp16")]; + fp32 var_3172_epsilon_0 = const()[name = string("op_3172_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3172_cast_fp16 = rsqrt(epsilon = var_3172_epsilon_0, x = var_3171_cast_fp16)[name = string("op_3172_cast_fp16")]; + tensor hidden_states_539_cast_fp16 = mul(x = hidden_states_535_cast_fp16, y = var_3172_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; + tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821244928)))]; + tensor input_163_cast_fp16 = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_539_cast_fp16)[name = string("input_163_cast_fp16")]; + tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821249088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829637760))))[name = string("model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_3184_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_3184_cast_fp16")]; + tensor model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830686400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839075072))))[name = string("model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor input_167_cast_fp16 = mul(x = var_3184_cast_fp16, y = linear_145_cast_fp16)[name = string("input_167_cast_fp16")]; + tensor model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840123712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(848512384))))[name = string("model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_146_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor hidden_states_545_cast_fp16 = add(x = hidden_states_535_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_545_cast_fp16")]; + fp16 var_64_promoted_42_to_fp16 = const()[name = string("op_64_promoted_42_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3197_cast_fp16 = pow(x = hidden_states_545_cast_fp16, y = var_64_promoted_42_to_fp16)[name = string("op_3197_cast_fp16")]; + tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; + bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; + tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_3197_cast_fp16)[name = string("variance_85_cast_fp16")]; + fp16 var_3200_to_fp16 = const()[name = string("op_3200_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3201_cast_fp16 = add(x = variance_85_cast_fp16, y = var_3200_to_fp16)[name = string("op_3201_cast_fp16")]; + fp32 var_3202_epsilon_0 = const()[name = string("op_3202_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3202_cast_fp16 = rsqrt(epsilon = var_3202_epsilon_0, x = var_3201_cast_fp16)[name = string("op_3202_cast_fp16")]; + tensor hidden_states_549_cast_fp16 = mul(x = hidden_states_545_cast_fp16, y = var_3202_cast_fp16)[name = string("hidden_states_549_cast_fp16")]; + tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849561024)))]; + tensor hidden_states_553_cast_fp16 = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_549_cast_fp16)[name = string("hidden_states_553_cast_fp16")]; + tensor var_3213_shape_cast_fp16 = shape(x = hidden_states_553_cast_fp16)[name = string("op_3213_shape_cast_fp16")]; + int32 gather_214 = const()[name = string("gather_214"), val = int32(1)]; + int32 gather_215_axis_0 = const()[name = string("gather_215_axis_0"), val = int32(0)]; + int32 gather_215_batch_dims_0 = const()[name = string("gather_215_batch_dims_0"), val = int32(0)]; + bool gather_215_validate_indices_0 = const()[name = string("gather_215_validate_indices_0"), val = bool(false)]; + string var_3213_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3213_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_215_to_uint16 = const()[name = string("select_215_to_uint16"), val = uint16(1)]; + tensor var_3213_shape_cast_fp16_to_uint16 = cast(dtype = var_3213_shape_cast_fp16_to_uint16_dtype_0, x = var_3213_shape_cast_fp16)[name = string("cast_11")]; + uint16 gather_215_cast_uint16 = gather(axis = gather_215_axis_0, batch_dims = gather_215_batch_dims_0, indices = select_215_to_uint16, validate_indices = gather_215_validate_indices_0, x = var_3213_shape_cast_fp16_to_uint16)[name = string("gather_215_cast_uint16")]; + string gather_215_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_215_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(849565184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851662400))))[name = string("model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_553_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(851924608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854021824))))[name = string("model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_148_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_553_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854284032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(856381248))))[name = string("model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_149_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_553_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor concat_315x = const()[name = string("concat_315x"), val = tensor([1, -1, 32, 64])]; + tensor var_3222_cast_fp16 = reshape(shape = concat_315x, x = linear_147_cast_fp16)[name = string("op_3222_cast_fp16")]; + tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_316x = const()[name = string("concat_316x"), val = tensor([1, -1, 32, 64])]; + tensor var_3225_cast_fp16 = reshape(shape = concat_316x, x = linear_148_cast_fp16)[name = string("op_3225_cast_fp16")]; + tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_317x = const()[name = string("concat_317x"), val = tensor([1, -1, 32, 64])]; + tensor var_3228_cast_fp16 = reshape(shape = concat_317x, x = linear_149_cast_fp16)[name = string("op_3228_cast_fp16")]; + tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_3222_cast_fp16)[name = string("transpose_11")]; + tensor var_3232_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3232_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3243_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3243_cast_fp16")]; + bool var_3245_interleave_0 = const()[name = string("op_3245_interleave_0"), val = bool(false)]; + tensor var_3245_cast_fp16 = concat(axis = var_69, interleave = var_3245_interleave_0, values = (var_3243_cast_fp16, x1_85_cast_fp16))[name = string("op_3245_cast_fp16")]; + tensor var_3246_cast_fp16 = mul(x = var_3245_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3246_cast_fp16")]; + tensor query_states_87_cast_fp16 = add(x = var_3232_cast_fp16, y = var_3246_cast_fp16)[name = string("query_states_87_cast_fp16")]; + tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_3225_cast_fp16)[name = string("transpose_10")]; + tensor var_3248_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3248_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3259_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_3259_cast_fp16")]; + bool var_3261_interleave_0 = const()[name = string("op_3261_interleave_0"), val = bool(false)]; + tensor var_3261_cast_fp16 = concat(axis = var_69, interleave = var_3261_interleave_0, values = (var_3259_cast_fp16, x1_87_cast_fp16))[name = string("op_3261_cast_fp16")]; + tensor var_3262_cast_fp16 = mul(x = var_3261_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3262_cast_fp16")]; + tensor k_state_43_cast_fp16 = add(x = var_3248_cast_fp16, y = var_3262_cast_fp16)[name = string("k_state_43_cast_fp16")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor concat_320_values0_0 = const()[name = string("concat_320_values0_0"), val = tensor([21])]; + int32 concat_320_axis_0 = const()[name = string("concat_320_axis_0"), val = int32(0)]; + bool concat_320_interleave_0 = const()[name = string("concat_320_interleave_0"), val = bool(false)]; + tensor concat_320 = concat(axis = concat_320_axis_0, interleave = concat_320_interleave_0, values = (concat_320_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_320")]; + tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_320, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; + tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_43_cast_fp16 = transpose(perm = v_state_43_perm_0, x = var_3228_cast_fp16)[name = string("transpose_9")]; + tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_320, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; + tensor var_3285_begin_0 = const()[name = string("op_3285_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_3285_end_0 = const()[name = string("op_3285_end_0"), val = tensor([22, 1, 32, 2048, 64])]; + tensor var_3285_end_mask_0 = const()[name = string("op_3285_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3285_squeeze_mask_0 = const()[name = string("op_3285_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3285_cast_fp16 = slice_by_index(begin = var_3285_begin_0, end = var_3285_end_0, end_mask = var_3285_end_mask_0, squeeze_mask = var_3285_squeeze_mask_0, x = coreml_update_state_90)[name = string("op_3285_cast_fp16")]; + tensor var_3288_begin_0 = const()[name = string("op_3288_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3288_end_mask_0 = const()[name = string("op_3288_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3288_cast_fp16 = slice_by_index(begin = var_3288_begin_0, end = concat_11, end_mask = var_3288_end_mask_0, x = var_3285_cast_fp16)[name = string("op_3288_cast_fp16")]; + tensor var_3290_begin_0 = const()[name = string("op_3290_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_3290_end_0 = const()[name = string("op_3290_end_0"), val = tensor([22, 1, 32, 2048, 64])]; + tensor var_3290_end_mask_0 = const()[name = string("op_3290_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3290_squeeze_mask_0 = const()[name = string("op_3290_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3290_cast_fp16 = slice_by_index(begin = var_3290_begin_0, end = var_3290_end_0, end_mask = var_3290_end_mask_0, squeeze_mask = var_3290_squeeze_mask_0, x = coreml_update_state_91)[name = string("op_3290_cast_fp16")]; + tensor var_3293_begin_0 = const()[name = string("op_3293_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3293_end_mask_0 = const()[name = string("op_3293_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3293_cast_fp16 = slice_by_index(begin = var_3293_begin_0, end = concat_11, end_mask = var_3293_end_mask_0, x = var_3290_cast_fp16)[name = string("op_3293_cast_fp16")]; + tensor var_3295_shape_cast_fp16 = shape(x = var_3288_cast_fp16)[name = string("op_3295_shape_cast_fp16")]; + int32 gather_223_axis_0 = const()[name = string("gather_223_axis_0"), val = int32(0)]; + int32 gather_223_batch_dims_0 = const()[name = string("gather_223_batch_dims_0"), val = int32(0)]; + bool gather_223_validate_indices_0 = const()[name = string("gather_223_validate_indices_0"), val = bool(false)]; + string var_3295_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3295_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_223_to_uint16 = const()[name = string("select_223_to_uint16"), val = uint16(2)]; + tensor var_3295_shape_cast_fp16_to_uint16 = cast(dtype = var_3295_shape_cast_fp16_to_uint16_dtype_0, x = var_3295_shape_cast_fp16)[name = string("cast_10")]; + uint16 gather_223_cast_uint16 = gather(axis = gather_223_axis_0, batch_dims = gather_223_batch_dims_0, indices = select_223_to_uint16, validate_indices = gather_223_validate_indices_0, x = var_3295_shape_cast_fp16_to_uint16)[name = string("gather_223_cast_uint16")]; + string gather_223_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_223_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_328_values0_0 = const()[name = string("concat_328_values0_0"), val = int32(1)]; + int32 concat_328_values1_0 = const()[name = string("concat_328_values1_0"), val = int32(1)]; + int32 concat_328_values2_0 = const()[name = string("concat_328_values2_0"), val = int32(0)]; + int32 concat_328_axis_0 = const()[name = string("concat_328_axis_0"), val = int32(0)]; + bool concat_328_interleave_0 = const()[name = string("concat_328_interleave_0"), val = bool(false)]; + int32 gather_223_cast_uint16_to_int32 = cast(dtype = gather_223_cast_uint16_to_int32_dtype_0, x = gather_223_cast_uint16)[name = string("cast_9")]; + tensor concat_328 = concat(axis = concat_328_axis_0, interleave = concat_328_interleave_0, values = (concat_328_values0_0, concat_328_values1_0, concat_328_values2_0, gather_223_cast_uint16_to_int32))[name = string("concat_328")]; + tensor causal_mask_45_begin_0 = const()[name = string("causal_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_45_end_mask_0 = const()[name = string("causal_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_45_cast_fp16 = slice_by_index(begin = causal_mask_45_begin_0, end = concat_328, end_mask = causal_mask_45_end_mask_0, x = causal_mask)[name = string("causal_mask_45_cast_fp16")]; + tensor attn_output_85_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_45_cast_fp16, key = var_3288_cast_fp16, query = query_states_87_cast_fp16, value = var_3293_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_3301_perm_0 = const()[name = string("op_3301_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_329_axis_0 = const()[name = string("concat_329_axis_0"), val = int32(0)]; + bool concat_329_interleave_0 = const()[name = string("concat_329_interleave_0"), val = bool(false)]; + int32 gather_215_cast_uint16_to_int32 = cast(dtype = gather_215_cast_uint16_to_int32_dtype_0, x = gather_215_cast_uint16)[name = string("cast_8")]; + tensor concat_329 = concat(axis = concat_329_axis_0, interleave = concat_329_interleave_0, values = (gather_214, gather_215_cast_uint16_to_int32, var_69))[name = string("concat_329")]; + tensor var_3301_cast_fp16 = transpose(perm = var_3301_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_8")]; + tensor input_169_cast_fp16 = reshape(shape = concat_329, x = var_3301_cast_fp16)[name = string("input_169_cast_fp16")]; + tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(856643456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858740672))))[name = string("model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_150_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized, x = input_169_cast_fp16)[name = string("linear_150_cast_fp16")]; + tensor hidden_states_561_cast_fp16 = add(x = hidden_states_545_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_561_cast_fp16")]; + fp16 var_64_promoted_43_to_fp16 = const()[name = string("op_64_promoted_43_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3310_cast_fp16 = pow(x = hidden_states_561_cast_fp16, y = var_64_promoted_43_to_fp16)[name = string("op_3310_cast_fp16")]; + tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; + bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; + tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_3310_cast_fp16)[name = string("variance_87_cast_fp16")]; + fp16 var_3313_to_fp16 = const()[name = string("op_3313_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3314_cast_fp16 = add(x = variance_87_cast_fp16, y = var_3313_to_fp16)[name = string("op_3314_cast_fp16")]; + fp32 var_3315_epsilon_0 = const()[name = string("op_3315_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3315_cast_fp16 = rsqrt(epsilon = var_3315_epsilon_0, x = var_3314_cast_fp16)[name = string("op_3315_cast_fp16")]; + tensor hidden_states_565_cast_fp16 = mul(x = hidden_states_561_cast_fp16, y = var_3315_cast_fp16)[name = string("hidden_states_565_cast_fp16")]; + tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859002880)))]; + tensor input_171_cast_fp16 = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_565_cast_fp16)[name = string("input_171_cast_fp16")]; + tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859007040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(867395712))))[name = string("model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor var_3327_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_3327_cast_fp16")]; + tensor model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(868444352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876833024))))[name = string("model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor input_175_cast_fp16 = mul(x = var_3327_cast_fp16, y = linear_152_cast_fp16)[name = string("input_175_cast_fp16")]; + tensor model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877881664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(886270336))))[name = string("model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_153_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor hidden_states_571_cast_fp16 = add(x = hidden_states_561_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_571_cast_fp16")]; + fp16 var_64_promoted_44_to_fp16 = const()[name = string("op_64_promoted_44_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3340_cast_fp16 = pow(x = hidden_states_571_cast_fp16, y = var_64_promoted_44_to_fp16)[name = string("op_3340_cast_fp16")]; + tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; + bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; + tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_3340_cast_fp16)[name = string("variance_89_cast_fp16")]; + fp16 var_3343_to_fp16 = const()[name = string("op_3343_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3344_cast_fp16 = add(x = variance_89_cast_fp16, y = var_3343_to_fp16)[name = string("op_3344_cast_fp16")]; + fp32 var_3345_epsilon_0 = const()[name = string("op_3345_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3345_cast_fp16 = rsqrt(epsilon = var_3345_epsilon_0, x = var_3344_cast_fp16)[name = string("op_3345_cast_fp16")]; + tensor hidden_states_575_cast_fp16 = mul(x = hidden_states_571_cast_fp16, y = var_3345_cast_fp16)[name = string("hidden_states_575_cast_fp16")]; + tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(887318976)))]; + tensor hidden_states_579_cast_fp16 = mul(x = model_model_layers_22_input_layernorm_weight_to_fp16, y = hidden_states_575_cast_fp16)[name = string("hidden_states_579_cast_fp16")]; + tensor var_3356_shape_cast_fp16 = shape(x = hidden_states_579_cast_fp16)[name = string("op_3356_shape_cast_fp16")]; + int32 gather_224 = const()[name = string("gather_224"), val = int32(1)]; + int32 gather_225_axis_0 = const()[name = string("gather_225_axis_0"), val = int32(0)]; + int32 gather_225_batch_dims_0 = const()[name = string("gather_225_batch_dims_0"), val = int32(0)]; + bool gather_225_validate_indices_0 = const()[name = string("gather_225_validate_indices_0"), val = bool(false)]; + string var_3356_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3356_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_225_to_uint16 = const()[name = string("select_225_to_uint16"), val = uint16(1)]; + tensor var_3356_shape_cast_fp16_to_uint16 = cast(dtype = var_3356_shape_cast_fp16_to_uint16_dtype_0, x = var_3356_shape_cast_fp16)[name = string("cast_7")]; + uint16 gather_225_cast_uint16 = gather(axis = gather_225_axis_0, batch_dims = gather_225_batch_dims_0, indices = select_225_to_uint16, validate_indices = gather_225_validate_indices_0, x = var_3356_shape_cast_fp16_to_uint16)[name = string("gather_225_cast_uint16")]; + string gather_225_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_225_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(887323136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889420352))))[name = string("model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_579_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889682560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(891779776))))[name = string("model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_155_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_579_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892041984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(894139200))))[name = string("model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_156_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_579_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 32, 64])]; + tensor var_3365_cast_fp16 = reshape(shape = concat_330x, x = linear_154_cast_fp16)[name = string("op_3365_cast_fp16")]; + tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 32, 64])]; + tensor var_3368_cast_fp16 = reshape(shape = concat_331x, x = linear_155_cast_fp16)[name = string("op_3368_cast_fp16")]; + tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_332x = const()[name = string("concat_332x"), val = tensor([1, -1, 32, 64])]; + tensor var_3371_cast_fp16 = reshape(shape = concat_332x, x = linear_156_cast_fp16)[name = string("op_3371_cast_fp16")]; + tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_3365_cast_fp16)[name = string("transpose_7")]; + tensor var_3375_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3375_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3386_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_3386_cast_fp16")]; + bool var_3388_interleave_0 = const()[name = string("op_3388_interleave_0"), val = bool(false)]; + tensor var_3388_cast_fp16 = concat(axis = var_69, interleave = var_3388_interleave_0, values = (var_3386_cast_fp16, x1_89_cast_fp16))[name = string("op_3388_cast_fp16")]; + tensor var_3389_cast_fp16 = mul(x = var_3388_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3389_cast_fp16")]; + tensor query_states_91_cast_fp16 = add(x = var_3375_cast_fp16, y = var_3389_cast_fp16)[name = string("query_states_91_cast_fp16")]; + tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_3368_cast_fp16)[name = string("transpose_6")]; + tensor var_3391_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3391_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3402_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_3402_cast_fp16")]; + bool var_3404_interleave_0 = const()[name = string("op_3404_interleave_0"), val = bool(false)]; + tensor var_3404_cast_fp16 = concat(axis = var_69, interleave = var_3404_interleave_0, values = (var_3402_cast_fp16, x1_91_cast_fp16))[name = string("op_3404_cast_fp16")]; + tensor var_3405_cast_fp16 = mul(x = var_3404_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3405_cast_fp16")]; + tensor k_state_45_cast_fp16 = add(x = var_3391_cast_fp16, y = var_3405_cast_fp16)[name = string("k_state_45_cast_fp16")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([22])]; + int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; + bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; + tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_335")]; + tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_335, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; + tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_45_cast_fp16 = transpose(perm = v_state_45_perm_0, x = var_3371_cast_fp16)[name = string("transpose_5")]; + tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_335, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; + tensor var_3428_begin_0 = const()[name = string("op_3428_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_3428_end_0 = const()[name = string("op_3428_end_0"), val = tensor([23, 1, 32, 2048, 64])]; + tensor var_3428_end_mask_0 = const()[name = string("op_3428_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3428_squeeze_mask_0 = const()[name = string("op_3428_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3428_cast_fp16 = slice_by_index(begin = var_3428_begin_0, end = var_3428_end_0, end_mask = var_3428_end_mask_0, squeeze_mask = var_3428_squeeze_mask_0, x = coreml_update_state_92)[name = string("op_3428_cast_fp16")]; + tensor var_3431_begin_0 = const()[name = string("op_3431_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3431_end_mask_0 = const()[name = string("op_3431_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3431_cast_fp16 = slice_by_index(begin = var_3431_begin_0, end = concat_11, end_mask = var_3431_end_mask_0, x = var_3428_cast_fp16)[name = string("op_3431_cast_fp16")]; + tensor var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_3433_end_0 = const()[name = string("op_3433_end_0"), val = tensor([23, 1, 32, 2048, 64])]; + tensor var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3433_squeeze_mask_0 = const()[name = string("op_3433_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = var_3433_end_0, end_mask = var_3433_end_mask_0, squeeze_mask = var_3433_squeeze_mask_0, x = coreml_update_state_93)[name = string("op_3433_cast_fp16")]; + tensor var_3436_begin_0 = const()[name = string("op_3436_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3436_end_mask_0 = const()[name = string("op_3436_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3436_cast_fp16 = slice_by_index(begin = var_3436_begin_0, end = concat_11, end_mask = var_3436_end_mask_0, x = var_3433_cast_fp16)[name = string("op_3436_cast_fp16")]; + tensor var_3438_shape_cast_fp16 = shape(x = var_3431_cast_fp16)[name = string("op_3438_shape_cast_fp16")]; + int32 gather_233_axis_0 = const()[name = string("gather_233_axis_0"), val = int32(0)]; + int32 gather_233_batch_dims_0 = const()[name = string("gather_233_batch_dims_0"), val = int32(0)]; + bool gather_233_validate_indices_0 = const()[name = string("gather_233_validate_indices_0"), val = bool(false)]; + string var_3438_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3438_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_233_to_uint16 = const()[name = string("select_233_to_uint16"), val = uint16(2)]; + tensor var_3438_shape_cast_fp16_to_uint16 = cast(dtype = var_3438_shape_cast_fp16_to_uint16_dtype_0, x = var_3438_shape_cast_fp16)[name = string("cast_6")]; + uint16 gather_233_cast_uint16 = gather(axis = gather_233_axis_0, batch_dims = gather_233_batch_dims_0, indices = select_233_to_uint16, validate_indices = gather_233_validate_indices_0, x = var_3438_shape_cast_fp16_to_uint16)[name = string("gather_233_cast_uint16")]; + string gather_233_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_233_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_343_values0_0 = const()[name = string("concat_343_values0_0"), val = int32(1)]; + int32 concat_343_values1_0 = const()[name = string("concat_343_values1_0"), val = int32(1)]; + int32 concat_343_values2_0 = const()[name = string("concat_343_values2_0"), val = int32(0)]; + int32 concat_343_axis_0 = const()[name = string("concat_343_axis_0"), val = int32(0)]; + bool concat_343_interleave_0 = const()[name = string("concat_343_interleave_0"), val = bool(false)]; + int32 gather_233_cast_uint16_to_int32 = cast(dtype = gather_233_cast_uint16_to_int32_dtype_0, x = gather_233_cast_uint16)[name = string("cast_5")]; + tensor concat_343 = concat(axis = concat_343_axis_0, interleave = concat_343_interleave_0, values = (concat_343_values0_0, concat_343_values1_0, concat_343_values2_0, gather_233_cast_uint16_to_int32))[name = string("concat_343")]; + tensor causal_mask_47_begin_0 = const()[name = string("causal_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_47_end_mask_0 = const()[name = string("causal_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_47_cast_fp16 = slice_by_index(begin = causal_mask_47_begin_0, end = concat_343, end_mask = causal_mask_47_end_mask_0, x = causal_mask)[name = string("causal_mask_47_cast_fp16")]; + tensor attn_output_89_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_47_cast_fp16, key = var_3431_cast_fp16, query = query_states_91_cast_fp16, value = var_3436_cast_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor var_3444_perm_0 = const()[name = string("op_3444_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_344_axis_0 = const()[name = string("concat_344_axis_0"), val = int32(0)]; + bool concat_344_interleave_0 = const()[name = string("concat_344_interleave_0"), val = bool(false)]; + int32 gather_225_cast_uint16_to_int32 = cast(dtype = gather_225_cast_uint16_to_int32_dtype_0, x = gather_225_cast_uint16)[name = string("cast_4")]; + tensor concat_344 = concat(axis = concat_344_axis_0, interleave = concat_344_interleave_0, values = (gather_224, gather_225_cast_uint16_to_int32, var_69))[name = string("concat_344")]; + tensor var_3444_cast_fp16 = transpose(perm = var_3444_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_4")]; + tensor input_177_cast_fp16 = reshape(shape = concat_344, x = var_3444_cast_fp16)[name = string("input_177_cast_fp16")]; + tensor model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(894401408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896498624))))[name = string("model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_157_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor hidden_states_587_cast_fp16 = add(x = hidden_states_571_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_587_cast_fp16")]; + fp16 var_64_promoted_45_to_fp16 = const()[name = string("op_64_promoted_45_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3453_cast_fp16 = pow(x = hidden_states_587_cast_fp16, y = var_64_promoted_45_to_fp16)[name = string("op_3453_cast_fp16")]; + tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; + bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; + tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_3453_cast_fp16)[name = string("variance_91_cast_fp16")]; + fp16 var_3456_to_fp16 = const()[name = string("op_3456_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3457_cast_fp16 = add(x = variance_91_cast_fp16, y = var_3456_to_fp16)[name = string("op_3457_cast_fp16")]; + fp32 var_3458_epsilon_0 = const()[name = string("op_3458_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3458_cast_fp16 = rsqrt(epsilon = var_3458_epsilon_0, x = var_3457_cast_fp16)[name = string("op_3458_cast_fp16")]; + tensor hidden_states_591_cast_fp16 = mul(x = hidden_states_587_cast_fp16, y = var_3458_cast_fp16)[name = string("hidden_states_591_cast_fp16")]; + tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896760832)))]; + tensor input_179_cast_fp16 = mul(x = model_model_layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_591_cast_fp16)[name = string("input_179_cast_fp16")]; + tensor model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896764992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905153664))))[name = string("model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_158_cast_fp16")]; + tensor var_3470_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_3470_cast_fp16")]; + tensor model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(906202304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914590976))))[name = string("model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor input_183_cast_fp16 = mul(x = var_3470_cast_fp16, y = linear_159_cast_fp16)[name = string("input_183_cast_fp16")]; + tensor model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(915639616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(924028288))))[name = string("model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_160_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized, x = input_183_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor hidden_states_597_cast_fp16 = add(x = hidden_states_587_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_597_cast_fp16")]; + fp16 var_64_promoted_46_to_fp16 = const()[name = string("op_64_promoted_46_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3483_cast_fp16 = pow(x = hidden_states_597_cast_fp16, y = var_64_promoted_46_to_fp16)[name = string("op_3483_cast_fp16")]; + tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; + bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; + tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_3483_cast_fp16)[name = string("variance_93_cast_fp16")]; + fp16 var_3486_to_fp16 = const()[name = string("op_3486_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3487_cast_fp16 = add(x = variance_93_cast_fp16, y = var_3486_to_fp16)[name = string("op_3487_cast_fp16")]; + fp32 var_3488_epsilon_0 = const()[name = string("op_3488_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3488_cast_fp16 = rsqrt(epsilon = var_3488_epsilon_0, x = var_3487_cast_fp16)[name = string("op_3488_cast_fp16")]; + tensor hidden_states_601_cast_fp16 = mul(x = hidden_states_597_cast_fp16, y = var_3488_cast_fp16)[name = string("hidden_states_601_cast_fp16")]; + tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925076928)))]; + tensor hidden_states_605_cast_fp16 = mul(x = model_model_layers_23_input_layernorm_weight_to_fp16, y = hidden_states_601_cast_fp16)[name = string("hidden_states_605_cast_fp16")]; + tensor var_3499_shape_cast_fp16 = shape(x = hidden_states_605_cast_fp16)[name = string("op_3499_shape_cast_fp16")]; + int32 gather_234 = const()[name = string("gather_234"), val = int32(1)]; + int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; + int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; + bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; + string var_3499_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3499_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(1)]; + tensor var_3499_shape_cast_fp16_to_uint16 = cast(dtype = var_3499_shape_cast_fp16_to_uint16_dtype_0, x = var_3499_shape_cast_fp16)[name = string("cast_3")]; + uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_3499_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; + string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925081088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927178304))))[name = string("model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_605_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927440512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929537728))))[name = string("model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_162_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_605_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929799936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931897152))))[name = string("model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_163_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_605_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor concat_345x = const()[name = string("concat_345x"), val = tensor([1, -1, 32, 64])]; + tensor var_3508_cast_fp16 = reshape(shape = concat_345x, x = linear_161_cast_fp16)[name = string("op_3508_cast_fp16")]; + tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_346x = const()[name = string("concat_346x"), val = tensor([1, -1, 32, 64])]; + tensor var_3511_cast_fp16 = reshape(shape = concat_346x, x = linear_162_cast_fp16)[name = string("op_3511_cast_fp16")]; + tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 32, 64])]; + tensor var_3514_cast_fp16 = reshape(shape = concat_347x, x = linear_163_cast_fp16)[name = string("op_3514_cast_fp16")]; + tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_3508_cast_fp16)[name = string("transpose_3")]; + tensor var_3518_cast_fp16 = mul(x = q_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3518_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3529_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_3529_cast_fp16")]; + bool var_3531_interleave_0 = const()[name = string("op_3531_interleave_0"), val = bool(false)]; + tensor var_3531_cast_fp16 = concat(axis = var_69, interleave = var_3531_interleave_0, values = (var_3529_cast_fp16, x1_93_cast_fp16))[name = string("op_3531_cast_fp16")]; + tensor var_3532_cast_fp16 = mul(x = var_3531_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3532_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_3518_cast_fp16, y = var_3532_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_3511_cast_fp16)[name = string("transpose_2")]; + tensor var_3534_cast_fp16 = mul(x = k_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3534_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 32, 0, 32])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 32, 0, 64])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3545_cast_fp16 = mul(x = x2_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_3545_cast_fp16")]; + bool var_3547_interleave_0 = const()[name = string("op_3547_interleave_0"), val = bool(false)]; + tensor var_3547_cast_fp16 = concat(axis = var_69, interleave = var_3547_interleave_0, values = (var_3545_cast_fp16, x1_cast_fp16))[name = string("op_3547_cast_fp16")]; + tensor var_3548_cast_fp16 = mul(x = var_3547_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3548_cast_fp16")]; + tensor k_state_cast_fp16 = add(x = var_3534_cast_fp16, y = var_3548_cast_fp16)[name = string("k_state_cast_fp16")]; + tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; + tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; + tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; + tensor concat_350_values0_0 = const()[name = string("concat_350_values0_0"), val = tensor([23])]; + int32 concat_350_axis_0 = const()[name = string("concat_350_axis_0"), val = int32(0)]; + bool concat_350_interleave_0 = const()[name = string("concat_350_interleave_0"), val = bool(false)]; + tensor concat_350 = concat(axis = concat_350_axis_0, interleave = concat_350_interleave_0, values = (concat_350_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_350")]; + tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_350, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; + tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_3514_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_350, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; + tensor var_3571_begin_0 = const()[name = string("op_3571_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_3571_end_0 = const()[name = string("op_3571_end_0"), val = tensor([24, 1, 32, 2048, 64])]; + tensor var_3571_end_mask_0 = const()[name = string("op_3571_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3571_squeeze_mask_0 = const()[name = string("op_3571_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3571_cast_fp16 = slice_by_index(begin = var_3571_begin_0, end = var_3571_end_0, end_mask = var_3571_end_mask_0, squeeze_mask = var_3571_squeeze_mask_0, x = coreml_update_state_94)[name = string("op_3571_cast_fp16")]; + tensor var_3574_begin_0 = const()[name = string("op_3574_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3574_end_mask_0 = const()[name = string("op_3574_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3574_cast_fp16 = slice_by_index(begin = var_3574_begin_0, end = concat_11, end_mask = var_3574_end_mask_0, x = var_3571_cast_fp16)[name = string("op_3574_cast_fp16")]; + tensor var_3576_begin_0 = const()[name = string("op_3576_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_3576_end_0 = const()[name = string("op_3576_end_0"), val = tensor([24, 1, 32, 2048, 64])]; + tensor var_3576_end_mask_0 = const()[name = string("op_3576_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3576_squeeze_mask_0 = const()[name = string("op_3576_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3576_cast_fp16 = slice_by_index(begin = var_3576_begin_0, end = var_3576_end_0, end_mask = var_3576_end_mask_0, squeeze_mask = var_3576_squeeze_mask_0, x = coreml_update_state_95)[name = string("op_3576_cast_fp16")]; + tensor var_3579_begin_0 = const()[name = string("op_3579_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3579_end_mask_0 = const()[name = string("op_3579_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3579_cast_fp16 = slice_by_index(begin = var_3579_begin_0, end = concat_11, end_mask = var_3579_end_mask_0, x = var_3576_cast_fp16)[name = string("op_3579_cast_fp16")]; + tensor var_3581_shape_cast_fp16 = shape(x = var_3574_cast_fp16)[name = string("op_3581_shape_cast_fp16")]; + int32 gather_243_axis_0 = const()[name = string("gather_243_axis_0"), val = int32(0)]; + int32 gather_243_batch_dims_0 = const()[name = string("gather_243_batch_dims_0"), val = int32(0)]; + bool gather_243_validate_indices_0 = const()[name = string("gather_243_validate_indices_0"), val = bool(false)]; + string var_3581_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3581_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_243_to_uint16 = const()[name = string("select_243_to_uint16"), val = uint16(2)]; + tensor var_3581_shape_cast_fp16_to_uint16 = cast(dtype = var_3581_shape_cast_fp16_to_uint16_dtype_0, x = var_3581_shape_cast_fp16)[name = string("cast_2")]; + uint16 gather_243_cast_uint16 = gather(axis = gather_243_axis_0, batch_dims = gather_243_batch_dims_0, indices = select_243_to_uint16, validate_indices = gather_243_validate_indices_0, x = var_3581_shape_cast_fp16_to_uint16)[name = string("gather_243_cast_uint16")]; + string gather_243_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_243_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_358_values0_0 = const()[name = string("concat_358_values0_0"), val = int32(1)]; + int32 concat_358_values1_0 = const()[name = string("concat_358_values1_0"), val = int32(1)]; + int32 concat_358_values2_0 = const()[name = string("concat_358_values2_0"), val = int32(0)]; + int32 concat_358_axis_0 = const()[name = string("concat_358_axis_0"), val = int32(0)]; + bool concat_358_interleave_0 = const()[name = string("concat_358_interleave_0"), val = bool(false)]; + int32 gather_243_cast_uint16_to_int32 = cast(dtype = gather_243_cast_uint16_to_int32_dtype_0, x = gather_243_cast_uint16)[name = string("cast_1")]; + tensor concat_358 = concat(axis = concat_358_axis_0, interleave = concat_358_interleave_0, values = (concat_358_values0_0, concat_358_values1_0, concat_358_values2_0, gather_243_cast_uint16_to_int32))[name = string("concat_358")]; + tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_cast_fp16 = slice_by_index(begin = causal_mask_begin_0, end = concat_358, end_mask = causal_mask_end_mask_0, x = causal_mask)[name = string("causal_mask_cast_fp16")]; + tensor attn_output_93_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = var_3574_cast_fp16, query = query_states_cast_fp16, value = var_3579_cast_fp16)[name = string("attn_output_93_cast_fp16")]; + tensor var_3587_perm_0 = const()[name = string("op_3587_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_359_axis_0 = const()[name = string("concat_359_axis_0"), val = int32(0)]; + bool concat_359_interleave_0 = const()[name = string("concat_359_interleave_0"), val = bool(false)]; + int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_0")]; + tensor concat_359 = concat(axis = concat_359_axis_0, interleave = concat_359_interleave_0, values = (gather_234, gather_235_cast_uint16_to_int32, var_69))[name = string("concat_359")]; + tensor var_3587_cast_fp16 = transpose(perm = var_3587_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_0")]; + tensor input_185_cast_fp16 = reshape(shape = concat_359, x = var_3587_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(932159360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934256576))))[name = string("model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_164_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor hidden_states_613_cast_fp16 = add(x = hidden_states_597_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_613_cast_fp16")]; + fp16 var_64_promoted_47_to_fp16 = const()[name = string("op_64_promoted_47_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3596_cast_fp16 = pow(x = hidden_states_613_cast_fp16, y = var_64_promoted_47_to_fp16)[name = string("op_3596_cast_fp16")]; + tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; + bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; + tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_3596_cast_fp16)[name = string("variance_95_cast_fp16")]; + fp16 var_3599_to_fp16 = const()[name = string("op_3599_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3600_cast_fp16 = add(x = variance_95_cast_fp16, y = var_3599_to_fp16)[name = string("op_3600_cast_fp16")]; + fp32 var_3601_epsilon_0 = const()[name = string("op_3601_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3601_cast_fp16 = rsqrt(epsilon = var_3601_epsilon_0, x = var_3600_cast_fp16)[name = string("op_3601_cast_fp16")]; + tensor hidden_states_617_cast_fp16 = mul(x = hidden_states_613_cast_fp16, y = var_3601_cast_fp16)[name = string("hidden_states_617_cast_fp16")]; + tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934518784)))]; + tensor input_187_cast_fp16 = mul(x = model_model_layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_617_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934522944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(942911616))))[name = string("model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor var_3613_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_3613_cast_fp16")]; + tensor model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943960256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(952348928))))[name = string("model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_166_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_3613_cast_fp16, y = linear_166_cast_fp16)[name = string("input_191_cast_fp16")]; + tensor model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953397568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961786240))))[name = string("model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_167_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor hidden_states_623_cast_fp16 = add(x = hidden_states_613_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_623_cast_fp16")]; + fp16 var_64_promoted_48_to_fp16 = const()[name = string("op_64_promoted_48_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3622_cast_fp16 = pow(x = hidden_states_623_cast_fp16, y = var_64_promoted_48_to_fp16)[name = string("op_3622_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_3622_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_3625_to_fp16 = const()[name = string("op_3625_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3626_cast_fp16 = add(x = variance_cast_fp16, y = var_3625_to_fp16)[name = string("op_3626_cast_fp16")]; + fp32 var_3627_epsilon_0 = const()[name = string("op_3627_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3627_cast_fp16 = rsqrt(epsilon = var_3627_epsilon_0, x = var_3626_cast_fp16)[name = string("op_3627_cast_fp16")]; + tensor hidden_states_627_cast_fp16 = mul(x = hidden_states_623_cast_fp16, y = var_3627_cast_fp16)[name = string("hidden_states_627_cast_fp16")]; + tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(962834880)))]; + tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_627_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor linear_168_bias_0_to_fp16 = const()[name = string("linear_168_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(962839040)))]; + tensor logits = linear(bias = linear_168_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16_quantized, x = hidden_states_cast_fp16)[name = string("linear_168_cast_fp16")]; + } -> (logits); +} \ No newline at end of file