metadata
base_model: []
library_name: transformers
tags:
- mergekit
- merge
L3-Steno-Maid-Black-LARGE-s1-36-sxy
This is a merge of pre-trained language models created using mergekit.
Merge Details
Merge Method
This model was merged using the passthrough merge method.
Models Merged
The following models were included in the merge:
- G:/7B/L3-Lumimaid-8B-v0.1-OAS
- G:/7B/L3-Jamet-8B-MK.V-Blackroot
- G:/7B/L3-8B-Stheno-v3.2
Configuration
The following YAML configuration was used to produce this model:
# 32 layers -> VS 40
#models:
# - model: G:/7B/L3-8B-Stheno-v3.2
# - model: G:/7B/Llama-3-Lumimaid-8B-v0.1-OAS
# - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
#merge_method: model_stock
#base_model: G:/7B/L3-8B-Stheno-v3.2
#dtype: float32
slices:
- sources:
- model: G:/7B/L3-8B-Stheno-v3.2
layer_range: [0, 14]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
layer_range: [8, 20]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [12, 24]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-8B-Stheno-v3.2
layer_range: [14, 20]
parameters:
scale:
- filter: o_proj
value: .8
- filter: down_proj
value: .8
- value: .8
- sources:
- model: G:/7B/L3-8B-Stheno-v3.2
layer_range: [20, 25]
parameters:
scale:
- filter: o_proj
value: .8
- filter: down_proj
value: .8
- value: .8
- sources:
- model: G:/7B/L3-8B-Stheno-v3.2
layer_range: [25, 27]
parameters:
scale:
- filter: o_proj
value: .6
- filter: down_proj
value: .6
- value: 1
- sources:
- model: G:/7B/L3-8B-Stheno-v3.2
layer_range: [27, 28]
parameters:
scale:
- filter: o_proj
value: .9
- filter: down_proj
value: .9
- value: 1
- sources:
- model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
layer_range: [20, 25]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
layer_range: [25, 27]
parameters:
scale:
- filter: o_proj
value: .6
- filter: down_proj
value: .6
- value: 1
- sources:
- model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
layer_range: [27, 31]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [24, 31]
parameters:
scale:
- filter: o_proj
value: 1
- filter: down_proj
value: 1
- value: 1
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [31, 32]
parameters:
scale:
- filter: o_proj
value: 0.3333333333333
- filter: down_proj
value: 0.3333333333333
- value: 0.3333333333333
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [31, 32]
parameters:
scale:
- filter: o_proj
value: 0.4444444444444
- filter: down_proj
value: 0.4444444444444
- value: 0.4444444444444
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [31, 32]
parameters:
scale:
- filter: o_proj
value: 0.5555555555555
- filter: down_proj
value: 0.5555555555555
- value: 0.5555555555555
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [31, 32]
parameters:
scale:
- filter: o_proj
value: 0.6666666666666
- filter: down_proj
value: 0.6666666666666
- value: 0.6666666666666
- sources:
- model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
layer_range: [31, 32]
parameters:
scale:
- filter: o_proj
value: 0.7777777777777
- filter: down_proj
value: 0.7777777777777
- value: 0.8888888888888
merge_method: passthrough
dtype: float16