DavidAU's picture
Upload folder using huggingface_hub
88b165b verified
|
raw
history blame
4.92 kB
metadata
base_model: []
library_name: transformers
tags:
  - mergekit
  - merge

L3-Steno-Maid-Black-LARGE-s1-36-sxy

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the passthrough merge method.

Models Merged

The following models were included in the merge:

  • G:/7B/L3-Lumimaid-8B-v0.1-OAS
  • G:/7B/L3-Jamet-8B-MK.V-Blackroot
  • G:/7B/L3-8B-Stheno-v3.2

Configuration

The following YAML configuration was used to produce this model:

# 32 layers -> VS 40
#models:
#  - model: G:/7B/L3-8B-Stheno-v3.2
#  - model: G:/7B/Llama-3-Lumimaid-8B-v0.1-OAS
#  - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
#merge_method: model_stock
#base_model: G:/7B/L3-8B-Stheno-v3.2
#dtype: float32

slices:
 - sources:
   - model: G:/7B/L3-8B-Stheno-v3.2
     layer_range: [0, 14]  
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
     layer_range: [8, 20]
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [12, 24]
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-8B-Stheno-v3.2
     layer_range: [14, 20]
     parameters:
       scale:
         - filter: o_proj
           value: .8
         - filter: down_proj
           value: .8
         - value: .8
 - sources:
   - model: G:/7B/L3-8B-Stheno-v3.2
     layer_range: [20, 25]
     parameters:
       scale:
         - filter: o_proj
           value: .8
         - filter: down_proj
           value: .8
         - value: .8
 - sources:
   - model: G:/7B/L3-8B-Stheno-v3.2
     layer_range: [25, 27]
     parameters:
       scale:
         - filter: o_proj
           value: .6
         - filter: down_proj
           value: .6
         - value: 1
 - sources:
   - model: G:/7B/L3-8B-Stheno-v3.2
     layer_range: [27, 28]
     parameters:
       scale:
         - filter: o_proj
           value: .9
         - filter: down_proj
           value: .9
         - value: 1
 - sources:
   - model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
     layer_range: [20, 25]  
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
     layer_range: [25, 27]  
     parameters:
       scale:
         - filter: o_proj
           value: .6
         - filter: down_proj
           value: .6
         - value: 1
 - sources:
   - model: G:/7B/L3-Lumimaid-8B-v0.1-OAS
     layer_range: [27, 31]  
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [24, 31]
     parameters:
       scale:
         - filter: o_proj
           value: 1
         - filter: down_proj
           value: 1
         - value: 1
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [31, 32]
     parameters:
       scale:
         - filter: o_proj
           value: 0.3333333333333
         - filter: down_proj
           value: 0.3333333333333
         - value: 0.3333333333333
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [31, 32]
     parameters:
       scale:
         - filter: o_proj
           value: 0.4444444444444
         - filter: down_proj
           value: 0.4444444444444
         - value: 0.4444444444444
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [31, 32]
     parameters:
       scale:
         - filter: o_proj
           value: 0.5555555555555
         - filter: down_proj
           value: 0.5555555555555
         - value: 0.5555555555555
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [31, 32]
     parameters:
       scale:
         - filter: o_proj
           value: 0.6666666666666
         - filter: down_proj
           value: 0.6666666666666
         - value: 0.6666666666666
 - sources:
   - model: G:/7B/L3-Jamet-8B-MK.V-Blackroot
     layer_range: [31, 32]
     parameters:
       scale:
         - filter: o_proj
           value: 0.7777777777777
         - filter: down_proj
           value: 0.7777777777777
         - value: 0.8888888888888
merge_method: passthrough
dtype: float16