INFERENCE_PRECISION=float16 | |
MAX_BEAM_WIDTH=4 | |
MAX_BATCH_SIZE=64 | |
checkpoint_dir=whisper_multi_zh_tllm_checkpoint | |
output_dir=whisper_multi_zh | |
# Download the fine-tuned model https://huggingface.co/yuekai/icefall_asr_multi-hans-zh_whisper/blob/main/v1.1/whisper-large-v2-multi-hans-zh-epoch-3-avg-10.pt. | |
# python3 convert_checkpoint.py \ | |
# --output_dir whisper_multi_zh_tllm_checkpoint \ | |
# --model_name large-v2 | |
trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \ | |
--output_dir ${output_dir}/encoder \ | |
--moe_plugin disable \ | |
--enable_xqa disable \ | |
--max_batch_size ${MAX_BATCH_SIZE} \ | |
--gemm_plugin disable \ | |
--bert_attention_plugin ${INFERENCE_PRECISION} \ | |
--max_input_len 3000 --max_seq_len=3000 | |
# --remove_input_padding disable --paged_kv_cache disable | |