base_model: Qwen/Qwen2.5-1.5B-Instruct
gate_mode: hidden
dtype: bfloat16
experts_per_token: 2
experts:
  - source_model: Qwen/Qwen2.5-1.5B-Instruct
    positive_prompts:
      - "chat assistant"
      - "chat"
      - "assistant"
  - source_model: Qwen/Qwen2.5-1.5B
    positive_prompts:
      - "writing"
      - "text writing"
      - "text editing"
      - "text analysis"
      - "text enchancing"
  - source_model: Qwen/Qwen2.5-Math-1.5B-Instruct
    positive_prompts:
      - "math"
      - "math expert"
      - "calculating"
      - "math problem resolving"
      - "logics"
  - source_model: Qwen/Qwen2.5-Coder-1.5B-Instruct
    positive_prompts:
      - "coding"
      - "coder"
      - "Python coder"
      - "Java coder"
      - "JS coder"
      - "HTML/CSS coder"
      - "code refactor"
      - "code review"
      - "code enchancing"
      - "rewrite code"
      - "optimize code"

shared_experts:
  - source_model: Qwen/Qwen2.5-1.5B-Instruct
    positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
      - "chat assistant"
    # (optional, but recommended:)
    residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model