merge_method: slerp # Define the merge method at the top level | |
slices: | |
- sources: | |
- model: mattshumer/Reflection-Llama-3.1-70B | |
layer_range: | |
- 0 | |
- 40 # Adjust layer range | |
- model: meta-llama/Meta-Llama-3.1-70B-Instruct | |
layer_range: | |
- 0 | |
- 40 | |
base_model: mattshumer/Reflection-Llama-3.1-70B # Define the base model at the slice level | |
parameters: | |
t: | |
- filter: self_attn | |
value: | |
- 0.1 # Modify weights for self attention | |
- 0.5 | |
- 0.4 | |
- 0.8 | |
- 1 | |
- filter: mlp | |
value: | |
- 0.9 # Modify weights for MLP layers | |
- 0.6 | |
- 0.7 | |
- 0.4 | |
- 0.2 | |
- value: 0.7 # General merge weight | |
dtype: bfloat16 # Keep for TPU efficiency | |