Update README.md
Browse files
README.md
CHANGED
@@ -21,50 +21,28 @@ QwenMoEAriel is a Mixture of Experts (MoE) made with the following models using
|
|
21 |
|
22 |
## 🧩 Configuration
|
23 |
base_model : Qwen/Qwen2-1.5B
|
24 |
-
|
25 |
architecture: qwen
|
26 |
-
|
27 |
experts:
|
28 |
-
|
29 |
- source_model: Qwen/Qwen2-1.5B
|
30 |
-
|
31 |
positive_prompts:
|
32 |
-
|
33 |
- "chat"
|
34 |
-
|
35 |
- "assistant"
|
36 |
-
|
37 |
- "tell me"
|
38 |
-
|
39 |
- "explain"
|
40 |
-
|
41 |
- "I want"
|
42 |
-
|
43 |
- source_model: Replete-AI/Replete-Coder-Qwen2-1.5b
|
44 |
-
|
45 |
positive_prompts:
|
46 |
-
|
47 |
- "code"
|
48 |
-
|
49 |
- "python"
|
50 |
-
|
51 |
- "javascript"
|
52 |
-
|
53 |
- "programming"
|
54 |
-
|
55 |
- "algorithm"
|
56 |
-
|
57 |
shared_experts:
|
58 |
-
|
59 |
- source_model: Qwen/Qwen2-1.5B
|
60 |
-
|
61 |
positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
|
62 |
-
|
63 |
-
- "chat"
|
64 |
-
|
65 |
# (optional, but recommended:)
|
66 |
-
|
67 |
-
residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model
|
68 |
|
69 |
## 💻 Usage
|
70 |
|
|
|
21 |
|
22 |
## 🧩 Configuration
|
23 |
base_model : Qwen/Qwen2-1.5B
|
|
|
24 |
architecture: qwen
|
|
|
25 |
experts:
|
|
|
26 |
- source_model: Qwen/Qwen2-1.5B
|
|
|
27 |
positive_prompts:
|
|
|
28 |
- "chat"
|
|
|
29 |
- "assistant"
|
|
|
30 |
- "tell me"
|
|
|
31 |
- "explain"
|
|
|
32 |
- "I want"
|
|
|
33 |
- source_model: Replete-AI/Replete-Coder-Qwen2-1.5b
|
|
|
34 |
positive_prompts:
|
|
|
35 |
- "code"
|
|
|
36 |
- "python"
|
|
|
37 |
- "javascript"
|
|
|
38 |
- "programming"
|
|
|
39 |
- "algorithm"
|
|
|
40 |
shared_experts:
|
|
|
41 |
- source_model: Qwen/Qwen2-1.5B
|
|
|
42 |
positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
|
43 |
+
- "chat"
|
|
|
|
|
44 |
# (optional, but recommended:)
|
45 |
+
residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model
|
|
|
46 |
|
47 |
## 💻 Usage
|
48 |
|