Fixed cache and added prefill ability.
Browse files- README.md +1 -1
- generation_config.json +1 -2
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -13,4 +13,4 @@ AWQ of the DeepSeek V3 chat model.
|
|
13 |
|
14 |
This quant modified some of the model code to fix the overflow issue when using float16.
|
15 |
|
16 |
-
Tested on vLLM with 8x H100, inference speed 5 tokens/s with batch size 1 and short
|
|
|
13 |
|
14 |
This quant modified some of the model code to fix the overflow issue when using float16.
|
15 |
|
16 |
+
Tested on vLLM with 8x H100, inference speed 5 tokens/s with batch size 1 and short prompts.
|
generation_config.json
CHANGED
@@ -3,6 +3,5 @@
|
|
3 |
"bos_token_id": 0,
|
4 |
"do_sample": true,
|
5 |
"eos_token_id": 1,
|
6 |
-
"transformers_version": "4.48.0.dev0"
|
7 |
-
"use_cache": false
|
8 |
}
|
|
|
3 |
"bos_token_id": 0,
|
4 |
"do_sample": true,
|
5 |
"eos_token_id": 1,
|
6 |
+
"transformers_version": "4.48.0.dev0"
|
|
|
7 |
}
|
tokenizer_config.json
CHANGED
@@ -6549,7 +6549,7 @@
|
|
6549 |
}
|
6550 |
},
|
6551 |
"bos_token": "<|begin▁of▁sentence|>",
|
6552 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content']
|
6553 |
"clean_up_tokenization_spaces": false,
|
6554 |
"eos_token": "<|end▁of▁sentence|>",
|
6555 |
"extra_special_tokens": {},
|
|
|
6549 |
}
|
6550 |
},
|
6551 |
"bos_token": "<|begin▁of▁sentence|>",
|
6552 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_assistant=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content']}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content']}}{%- endif %}{%- if not loop.last -%}{{'<|end▁of▁sentence|>'}}{%- else -%}{% set ns.is_last_assistant = true %}{%- endif -%}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool and not ns.is_last_assistant %}{{'<|Assistant|>'}}{% endif %}",
|
6553 |
"clean_up_tokenization_spaces": false,
|
6554 |
"eos_token": "<|end▁of▁sentence|>",
|
6555 |
"extra_special_tokens": {},
|