update README.md
Browse files
README.md
CHANGED
@@ -16,4 +16,51 @@ This is a merge of pre-trained language models created using [mergekit](https://
|
|
16 |
The following models were included in the merge:
|
17 |
* [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)
|
18 |
* [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf)
|
19 |
-
* [meta-math/MetaMath-Mistral-7B](https://huggingface.co/meta-math/MetaMath-Mistral-7B)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
The following models were included in the merge:
|
17 |
* [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)
|
18 |
* [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf)
|
19 |
+
* [meta-math/MetaMath-Mistral-7B](https://huggingface.co/meta-math/MetaMath-Mistral-7B)
|
20 |
+
|
21 |
+
### Configuration
|
22 |
+
|
23 |
+
The following YAML configuration was used to produce this model:
|
24 |
+
|
25 |
+
```yaml
|
26 |
+
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
27 |
+
gate_mode: hidden # one of "hidden", "cheap_embed", or "random"
|
28 |
+
dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
|
29 |
+
## (optional)
|
30 |
+
# experts_per_token: 2
|
31 |
+
experts:
|
32 |
+
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
33 |
+
positive_prompts:
|
34 |
+
- "chat"
|
35 |
+
- "assistant"
|
36 |
+
- "tell me"
|
37 |
+
- "explain"
|
38 |
+
- "I want"
|
39 |
+
## (optional)
|
40 |
+
# negative_prompts:
|
41 |
+
# - "This is a prompt expert_model_1 should not be used for"
|
42 |
+
- source_model: codellama/CodeLlama-7b-Instruct-hf
|
43 |
+
positive_prompts:
|
44 |
+
- "code"
|
45 |
+
- "python"
|
46 |
+
- "javascript"
|
47 |
+
- "programming"
|
48 |
+
- "algorithm"
|
49 |
+
- "C#"
|
50 |
+
- "C++"
|
51 |
+
- "debug"
|
52 |
+
- "runtime"
|
53 |
+
- "html"
|
54 |
+
- "command"
|
55 |
+
- "nodejs"
|
56 |
+
- source_model: meta-math/MetaMath-Mistral-7B
|
57 |
+
positive_prompts:
|
58 |
+
- "reason"
|
59 |
+
- "math"
|
60 |
+
- "mathematics"
|
61 |
+
- "solve"
|
62 |
+
- "count"
|
63 |
+
- "calculate"
|
64 |
+
- "arithmetic"
|
65 |
+
- "algebra"
|
66 |
+
```
|