const_tag: &MODEL1 jukofyork/Dawn-Miqu-70B | |
const_tag: &MODEL2 jukofyork/Dark-Miqu-70B | |
const_tag: &QK_ATTENUATION_FACTOR 0.8408964153 # sqrt(sqrt(1/2)) | |
const_tag: &MLP_DOWN_SCALE_FACTOR 0.7071067812 # sqrt(1/2) | |
scale-filter-env: | |
parameters: | |
scale: | |
- filter: q_proj | |
value: | |
- filter: k_proj | |
value: | |
- filter: down_proj | |
value: | |
- value: 1.0 | |
slices: | |
- sources: | |
- model: | |
layer_range: [0, 20] | |
- sources: | |
- model: | |
layer_range: [20, 40] | |
<<: | |
- sources: | |
- model: | |
layer_range: [20, 40] | |
<<: | |
- sources: | |
- model: | |
layer_range: [40, 60] | |
<<: | |
- sources: | |
- model: | |
layer_range: [40, 60] | |
<<: | |
- sources: | |
- model: | |
layer_range: [60, 80] | |
merge_method: passthrough | |
dtype: float16 |