File size: 5,521 Bytes
f0856eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# Either "cpu" or "cuda"
# NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral)
# NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral)
device: "cpu"
random_seed: 42 # Random seed to use
directories:
model_path1: "../jondurbin_bagel-dpo-34b-v0.2" # Path to the base model. Must be a local copy.
# model_directory: "../2xNous-Capybara-34B/" # Directory of models to scan, IGNORED if models_to_merge has entries in it
output_directory: "./mm-output" # Output directory of the merged model
# A list of models to use as merge candidates - HF syntax, so can be either local directories or repos.
# Overrides model_directory if used
models_to_merge: ["../NousResearch_Nous-Capybara-34B", "../NousResearch_Nous-Hermes-2-Yi-34B", "../SUSTech_SUS-Chat-34B"]
# Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time
merge_ratios: [0.2, 0.4, 0.6, 0.8]
# Choose from the following methods. Defaults to "lerp".
# "lerp" - Linear interpolation
# "slerp" - Spherical linear interpolation
# "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2]
# "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1]
merge_method: "slerp"
# If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized
# Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues
merge_headers: true
# Strategies:
# "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge.
# "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens)
# "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below.
strategy: "cumulative"
# Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement.
strategy_threshold: 0.6
# Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy.
# The weight value of phrases is ignored if set to true.
auto_weights: false
# Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts
bad_phrases:
- phrase: "anticipation"
weight: 12
contexts: ["Her body quivers with ", "The atmosphere is thick with "]
- phrase: "unwavering"
weight: 12
contexts: ["Filled with an "]
- phrase: "determination"
weight: 12
contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "]
- phrase: "whisper"
weight: 12
contexts: ["Her voice barely above a "]
- phrase: "spine"
weight: 12
contexts: ["shivers down her "]
- phrase: "sends shivers"
weight: 12
contexts: ["The thrill of the act "]
- phrase: "ministrations"
weight: 12
contexts: ["She moans and twitches at your "]
- phrase: "legs"
weight: 12
contexts: ["wraps her "]
- phrase: "imposing figure"
weight: 12
contexts: ["He had an "]
- phrase: "shared challenges"
weight: 12
contexts: ["Their bond strengthened through "]
- phrase: "bond"
weight: 12
contexts: ["forged a ", "an unspoken "]
- phrase: "enhance our experience"
weight: 12
contexts: ["I'm excited to see how "]
- phrase: "sense of vulnerability"
weight: 12
contexts: ["create a "]
- phrase: "dimensions of intimacy"
weight: 12
contexts: ["explore new "]
- phrase: "deepening our connection"
weight: 12
contexts: ["while "]
- phrase: "shared experiences"
weight: 12
contexts: ["through "]
- phrase: "societal expectations"
weight: 12
contexts: ["that transcend "]
- phrase: "conventional boundaries"
weight: 12
contexts: ["that defy ", "and defy "]
- phrase: "open communication"
weight: 12
contexts: ["an environment "]
- phrase: "emotional vulnerability"
weight: 12
contexts: ["an environment "]
- phrase: "heightens our connection"
weight: 12
contexts: ["touch and the anticipation "]
- phrase: "sensations you're creating"
weight: 12
contexts: ["I'm enjoying "]
- phrase: "is truly arousing"
weight: 12
contexts: ["attention to detail ", "way you explore my body "]
- phrase: "challenge presented"
weight: 12
contexts: ["my resolve unwavering despite "]
- phrase: "humble vessel"
weight: 12
contexts: ["surrendering to the exquisite torment "]
- phrase: "bond"
weight: 12
contexts: ["cherishing the unique ", "special "]
- phrase: "grows stronger with each passing day"
weight: 12
contexts: ["bond "]
- phrase: "that cannot be broken by time or circumstance"
weight: 12
contexts: ["bond "]
- phrase: "becomes unbreakable, eternal"
weight: 12
contexts: ["bond "]
- phrase: "grew stronger with each passing"
weight: 12
contexts: ["bond "]
# Note - Example of a complex phrase
good_phrases:
- phrase: "The apple is in the bedroom"
weight: 1
contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "]
|