sometimesanotion
commited on
Commit
•
cb5c8bb
1
Parent(s):
031a736
Update README.md
Browse files
README.md
CHANGED
@@ -162,163 +162,6 @@ For GGUFs, [mradermacher/Lamarck-14B-v0.3-i1-GGUF](https://huggingface.co/mrader
|
|
162 |
|
163 |
- **[sometimesanotion/lamarck-14b-prose-model_stock](https://huggingface.co/sometimesanotion/lamarck-14b-prose-model_stock)** - This brings in a little influence from [EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2](https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2), [oxyapi/oxy-1-small](https://huggingface.co/oxyapi/oxy-1-small), and [allura-org/TQ2.5-14B-Sugarquill-v1](https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1).
|
164 |
|
165 |
-
**Note on abliteration:** This author believes that adjacent services and not language models themselves are where guardrails are best placed. Effort to de-censor Lamarck will resume after the model has been further studied.
|
166 |
-
|
167 |
-
### Configuration
|
168 |
-
|
169 |
-
The following YAML configuration was used to produce this model:
|
170 |
-
|
171 |
-
```yaml
|
172 |
-
name: lamarck-14b-reason-della # This contributes the knowledge and reasoning pool, later to be merged
|
173 |
-
merge_method: della # with the dominant instruction-following model
|
174 |
-
base_model: arcee-ai/Virtuoso-Small
|
175 |
-
tokenizer_source: arcee-ai/Virtuoso-Small
|
176 |
-
parameters:
|
177 |
-
int8_mask: false
|
178 |
-
normalize: true
|
179 |
-
rescale: false
|
180 |
-
density: 0.30
|
181 |
-
weight: 0.50
|
182 |
-
epsilon: 0.08
|
183 |
-
lambda: 1.00
|
184 |
-
models:
|
185 |
-
- model: CultriX/SeQwence-14B-EvolMerge
|
186 |
-
parameters:
|
187 |
-
density: 0.70
|
188 |
-
weight: 0.90
|
189 |
-
- model: sometimesanotion/lamarck-14b-reason-model_stock
|
190 |
-
parameters:
|
191 |
-
density: 0.90
|
192 |
-
weight: 0.60
|
193 |
-
- model: CultriX/Qwen2.5-14B-Wernicke
|
194 |
-
parameters:
|
195 |
-
density: 0.20
|
196 |
-
weight: 0.30
|
197 |
-
dtype: bfloat16
|
198 |
-
out_dtype: bfloat16
|
199 |
-
---
|
200 |
-
name: lamarck-14b-prose-della # This contributes the prose, later to be merged
|
201 |
-
merge_method: della # with the dominant instruction-following model
|
202 |
-
base_model: arcee-ai/Virtuoso-Small
|
203 |
-
tokenizer_source: arcee-ai/Virtuoso-Small
|
204 |
-
parameters:
|
205 |
-
int8_mask: false
|
206 |
-
normalize: true
|
207 |
-
rescale: false
|
208 |
-
density: 0.30
|
209 |
-
weight: 0.50
|
210 |
-
epsilon: 0.08
|
211 |
-
lambda: 0.95
|
212 |
-
models:
|
213 |
-
- model: sthenno-com/miscii-14b-1028
|
214 |
-
parameters:
|
215 |
-
density: 0.40
|
216 |
-
weight: 0.90
|
217 |
-
- model: sometimesanotion/lamarck-14b-prose-model_stock
|
218 |
-
parameters:
|
219 |
-
density: 0.60
|
220 |
-
weight: 0.70
|
221 |
-
- model: underwoods/medius-erebus-magnum-14b
|
222 |
-
dtype: bfloat16
|
223 |
-
out_dtype: bfloat16
|
224 |
-
---
|
225 |
-
name: lamarck-14b-converge-della # This is the strongest control point to quickly
|
226 |
-
merge_method: della # re-balance reasoning vs. prose
|
227 |
-
base_model: arcee-ai/Virtuoso-Small
|
228 |
-
tokenizer_source: arcee-ai/Virtuoso-Small
|
229 |
-
parameters:
|
230 |
-
int8_mask: false
|
231 |
-
normalize: true
|
232 |
-
rescale: false
|
233 |
-
density: 0.30
|
234 |
-
weight: 0.50
|
235 |
-
epsilon: 0.08
|
236 |
-
lambda: 1.00
|
237 |
-
models:
|
238 |
-
- model: sometimesanotion/lamarck-14b-reason-della
|
239 |
-
parameters:
|
240 |
-
density: 0.80
|
241 |
-
weight: 1.00
|
242 |
-
- model: arcee-ai/Virtuoso-Small
|
243 |
-
parameters:
|
244 |
-
density: 0.40
|
245 |
-
weight: 0.50
|
246 |
-
- model: sometimesanotion/lamarck-14b-prose-della
|
247 |
-
parameters:
|
248 |
-
density: 0.10
|
249 |
-
weight: 0.40
|
250 |
-
dtype: bfloat16
|
251 |
-
out_dtype: bfloat16
|
252 |
-
---
|
253 |
-
name: lamarck-14b-converge # Virtuoso has good capabilities all-around; it is 100% of the first
|
254 |
-
merge_method: slerp # two layers, and blends into the reasoning+prose convergance
|
255 |
-
base_model: arcee-ai/Virtuoso-Small # for some interesting boosts
|
256 |
-
tokenizer_source: base
|
257 |
-
parameters:
|
258 |
-
t: [ 0.00, 0.60, 0.80, 0.80, 0.80, 0.70, 0.40 ]
|
259 |
-
slices:
|
260 |
-
- sources:
|
261 |
-
- layer_range: [ 0, 2 ]
|
262 |
-
model: arcee-ai/Virtuoso-Small
|
263 |
-
- layer_range: [ 0, 2 ]
|
264 |
-
model: merges/lamarck-14b-converge-della
|
265 |
-
t: [ 0.00, 0.00 ]
|
266 |
-
- sources:
|
267 |
-
- layer_range: [ 2, 8 ]
|
268 |
-
model: arcee-ai/Virtuoso-Small
|
269 |
-
- layer_range: [ 2, 8 ]
|
270 |
-
model: merges/lamarck-14b-converge-della
|
271 |
-
t: [ 0.00, 0.60 ]
|
272 |
-
- sources:
|
273 |
-
- layer_range: [ 8, 16 ]
|
274 |
-
model: arcee-ai/Virtuoso-Small
|
275 |
-
- layer_range: [ 8, 16 ]
|
276 |
-
model: merges/lamarck-14b-converge-della
|
277 |
-
t: [ 0.60, 0.70 ]
|
278 |
-
- sources:
|
279 |
-
- layer_range: [ 16, 24 ]
|
280 |
-
model: arcee-ai/Virtuoso-Small
|
281 |
-
- layer_range: [ 16, 24 ]
|
282 |
-
model: merges/lamarck-14b-converge-della
|
283 |
-
t: [ 0.70, 0.70 ]
|
284 |
-
- sources:
|
285 |
-
- layer_range: [ 24, 32 ]
|
286 |
-
model: arcee-ai/Virtuoso-Small
|
287 |
-
- layer_range: [ 24, 32 ]
|
288 |
-
model: merges/lamarck-14b-converge-della
|
289 |
-
t: [ 0.70, 0.70 ]
|
290 |
-
- sources:
|
291 |
-
- layer_range: [ 32, 40 ]
|
292 |
-
model: arcee-ai/Virtuoso-Small
|
293 |
-
- layer_range: [ 32, 40 ]
|
294 |
-
model: merges/lamarck-14b-converge-della
|
295 |
-
t: [ 0.70, 0.60 ]
|
296 |
-
- sources:
|
297 |
-
- layer_range: [ 40, 48 ]
|
298 |
-
model: arcee-ai/Virtuoso-Small
|
299 |
-
- layer_range: [ 40, 48 ]
|
300 |
-
model: merges/lamarck-14b-converge-della
|
301 |
-
t: [ 0.60, 0.40 ]
|
302 |
-
dtype: bfloat16
|
303 |
-
out_dtype: bfloat16
|
304 |
-
---
|
305 |
-
name: lamarck-14b-finalize
|
306 |
-
merge_method: ties
|
307 |
-
base_model: Qwen/Qwen2.5-14B
|
308 |
-
tokenizer_source: Qwen/Qwen2.5-14B-Instruct
|
309 |
-
parameters:
|
310 |
-
int8_mask: false
|
311 |
-
normalize: true
|
312 |
-
rescale: false
|
313 |
-
density: 1.00
|
314 |
-
weight: 1.00
|
315 |
-
models:
|
316 |
-
- model: merges/lamarck-14b-converge
|
317 |
-
dtype: bfloat16
|
318 |
-
out_dtype: bfloat16
|
319 |
-
---
|
320 |
-
|
321 |
-
```
|
322 |
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
323 |
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_sometimesanotion__Lamarck-14B-v0.3)
|
324 |
|
|
|
162 |
|
163 |
- **[sometimesanotion/lamarck-14b-prose-model_stock](https://huggingface.co/sometimesanotion/lamarck-14b-prose-model_stock)** - This brings in a little influence from [EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2](https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2), [oxyapi/oxy-1-small](https://huggingface.co/oxyapi/oxy-1-small), and [allura-org/TQ2.5-14B-Sugarquill-v1](https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1).
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
166 |
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_sometimesanotion__Lamarck-14B-v0.3)
|
167 |
|