temp: hardcode custom maxTokens
Browse files
src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte
CHANGED
@@ -6,7 +6,54 @@
|
|
6 |
export let conversation: Conversation;
|
7 |
export let classNames = "";
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
$: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
|
11 |
</script>
|
12 |
|
|
|
6 |
export let conversation: Conversation;
|
7 |
export let classNames = "";
|
8 |
|
9 |
+
const customMaxTokens: { [key: string]: number } = {
|
10 |
+
"01-ai/Yi-1.5-34B-Chat": 2048,
|
11 |
+
"HuggingFaceM4/idefics-9b-instruct": 2048,
|
12 |
+
"deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
|
13 |
+
"bigcode/starcoder": 8192,
|
14 |
+
"bigcode/starcoderplus": 8192,
|
15 |
+
"HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
|
16 |
+
"google/gemma-7b": 8192,
|
17 |
+
"google/gemma-1.1-7b-it": 8192,
|
18 |
+
"google/gemma-2b": 8192,
|
19 |
+
"google/gemma-1.1-2b-it": 8192,
|
20 |
+
"google/gemma-2-27b-it": 8192,
|
21 |
+
"google/gemma-2-9b-it": 4096,
|
22 |
+
"google/gemma-2-2b-it": 8192,
|
23 |
+
"tiiuae/falcon-7b": 8192,
|
24 |
+
"tiiuae/falcon-7b-instruct": 8192,
|
25 |
+
"timdettmers/guanaco-33b-merged": 2048,
|
26 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
27 |
+
"Qwen/Qwen2.5-72B-Instruct": 32768,
|
28 |
+
"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
|
29 |
+
"CohereForAI/c4ai-command-r-plus-08-2024": 32768,
|
30 |
+
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
31 |
+
"meta-llama/Llama-2-70b-chat-hf": 8192,
|
32 |
+
"HuggingFaceH4/zephyr-7b-alpha": 17432,
|
33 |
+
"HuggingFaceH4/zephyr-7b-beta": 32768,
|
34 |
+
"mistralai/Mistral-7B-Instruct-v0.1": 32768,
|
35 |
+
"mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
36 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
37 |
+
"mistralai/Mistral-Nemo-Instruct-2407": 32768,
|
38 |
+
"meta-llama/Meta-Llama-3-8B-Instruct": 8192,
|
39 |
+
"mistralai/Mistral-7B-v0.1": 32768,
|
40 |
+
"bigcode/starcoder2-3b": 16384,
|
41 |
+
"bigcode/starcoder2-15b": 16384,
|
42 |
+
"HuggingFaceH4/starchat2-15b-v0.1": 16384,
|
43 |
+
"codellama/CodeLlama-7b-hf": 8192,
|
44 |
+
"codellama/CodeLlama-13b-hf": 8192,
|
45 |
+
"codellama/CodeLlama-34b-Instruct-hf": 8192,
|
46 |
+
"meta-llama/Llama-2-7b-chat-hf": 8192,
|
47 |
+
"meta-llama/Llama-2-13b-chat-hf": 8192,
|
48 |
+
"OpenAssistant/oasst-sft-6-llama-30b": 2048,
|
49 |
+
"TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
|
50 |
+
"HuggingFaceH4/starchat-beta": 8192,
|
51 |
+
"bigcode/octocoder": 8192,
|
52 |
+
"vwxyzjn/starcoderbase-triviaqa": 8192,
|
53 |
+
"lvwerra/starcoderbase-gsm8k": 8192,
|
54 |
+
} as const;
|
55 |
+
|
56 |
+
$: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
|
57 |
$: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
|
58 |
</script>
|
59 |
|