File size: 1,205 Bytes
93e4a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
base_model: Qwen/Qwen2.5-1.5B-Instruct
gate_mode: hidden
dtype: bfloat16
experts_per_token: 2
experts:
  - source_model: Qwen/Qwen2.5-1.5B-Instruct
    positive_prompts:
      - "chat assistant"
      - "chat"
      - "assistant"
  - source_model: Qwen/Qwen2.5-1.5B
    positive_prompts:
      - "writing"
      - "text writing"
      - "text editing"
      - "text analysis"
      - "text enchancing"
  - source_model: Qwen/Qwen2.5-Math-1.5B-Instruct
    positive_prompts:
      - "math"
      - "math expert"
      - "calculating"
      - "math problem resolving"
      - "logics"
  - source_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
    positive_prompts:
      - "coding"
      - "coder"
      - "Python coder"
      - "Java coder"
      - "JS coder"
      - "HTML/CSS coder"
      - "code refactor"
      - "code review"
      - "code enchancing"
      - "rewrite code"
      - "optimize code"

shared_experts:
  - source_model: Qwen/Qwen2.5-1.5B-Instruct
    positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
      - "chat assistant"
    # (optional, but recommended:)
    residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model