File size: 5,521 Bytes
f0856eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

# Either "cpu" or "cuda"
# NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral)
# NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral)
device: "cpu"
random_seed: 42 # Random seed to use

directories:
  model_path1: "../jondurbin_bagel-dpo-34b-v0.2" # Path to the base model. Must be a local copy.
#  model_directory: "../2xNous-Capybara-34B/" # Directory of models to scan, IGNORED if models_to_merge has entries in it
  output_directory: "./mm-output" # Output directory of the merged model

# A list of models to use as merge candidates - HF syntax, so can be either local directories or repos.
# Overrides model_directory if used
models_to_merge: ["../NousResearch_Nous-Capybara-34B", "../NousResearch_Nous-Hermes-2-Yi-34B", "../SUSTech_SUS-Chat-34B"]

# Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time
merge_ratios: [0.2, 0.4, 0.6, 0.8]

# Choose from the following methods. Defaults to "lerp".
# "lerp" - Linear interpolation
# "slerp" - Spherical linear interpolation
# "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2]
# "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1]
merge_method: "slerp"

# If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized
# Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues
merge_headers: true

# Strategies:
# "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge.
# "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens)
# "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below.
strategy: "cumulative"
# Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement.
strategy_threshold: 0.6

# Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy.
# The weight value of phrases is ignored if set to true.
auto_weights: false

# Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts
bad_phrases:
  - phrase: "anticipation"
    weight: 12
    contexts: ["Her body quivers with ", "The atmosphere is thick with "]
  - phrase: "unwavering"
    weight: 12
    contexts: ["Filled with an "]
  - phrase: "determination"
    weight: 12
    contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "]
  - phrase: "whisper"
    weight: 12
    contexts: ["Her voice barely above a "]
  - phrase: "spine"
    weight: 12
    contexts: ["shivers down her "]
  - phrase: "sends shivers"
    weight: 12
    contexts: ["The thrill of the act "]
  - phrase: "ministrations"
    weight: 12
    contexts: ["She moans and twitches at your "]
  - phrase: "legs"
    weight: 12
    contexts: ["wraps her "]
  - phrase: "imposing figure"
    weight: 12
    contexts: ["He had an "]
  - phrase: "shared challenges"
    weight: 12
    contexts: ["Their bond strengthened through "]    
  - phrase: "bond"
    weight: 12
    contexts: ["forged a ", "an unspoken "]
  - phrase: "enhance our experience"
    weight: 12
    contexts: ["I'm excited to see how "]
  - phrase: "sense of vulnerability"
    weight: 12
    contexts: ["create a "]
  - phrase: "dimensions of intimacy"
    weight: 12
    contexts: ["explore new "]
  - phrase: "deepening our connection"
    weight: 12
    contexts: ["while "]
  - phrase: "shared experiences"
    weight: 12
    contexts: ["through "]
  - phrase: "societal expectations"
    weight: 12
    contexts: ["that transcend "]
  - phrase: "conventional boundaries"
    weight: 12
    contexts: ["that defy ", "and defy "]
  - phrase: "open communication"
    weight: 12
    contexts: ["an environment "]
  - phrase: "emotional vulnerability"
    weight: 12
    contexts: ["an environment "]
  - phrase: "heightens our connection"
    weight: 12
    contexts: ["touch and the anticipation "]
  - phrase: "sensations you're creating"
    weight: 12
    contexts: ["I'm enjoying "]
  - phrase: "is truly arousing"
    weight: 12
    contexts: ["attention to detail ", "way you explore my body "]
  - phrase: "challenge presented"
    weight: 12
    contexts: ["my resolve unwavering despite "]
  - phrase: "humble vessel"
    weight: 12
    contexts: ["surrendering to the exquisite torment "]
  - phrase: "bond"
    weight: 12
    contexts: ["cherishing the unique ", "special "]    
  - phrase: "grows stronger with each passing day"
    weight: 12
    contexts: ["bond "]    
  - phrase: "that cannot be broken by time or circumstance"
    weight: 12
    contexts: ["bond "]    
  - phrase: "becomes unbreakable, eternal"
    weight: 12
    contexts: ["bond "]    
  - phrase: "grew stronger with each passing"
    weight: 12
    contexts: ["bond "]    
    

# Note - Example of a complex phrase
good_phrases:
  - phrase: "The apple is in the bedroom"
    weight: 1
    contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "]