yuekai's picture
Upload folder using huggingface_hub
90edbdc verified
INFERENCE_PRECISION=float16
MAX_BEAM_WIDTH=4
MAX_BATCH_SIZE=64
checkpoint_dir=multi_zh_distil_tllm_checkpoint_pos_emb_true
output_dir=distil_whisper_multi_zh_remove_padding
# python3 convert_checkpoint.py \
# --output_dir $checkpoint_dir \
# --model_name distil-large-v2
# checkpoint_dir=multi_zh_distil_tllm_int8_checkpoint_pos_emb_true
# output_dir=distil_whisper_multi_zh_int8_remove_padding
# python3 convert_checkpoint.py --use_weight_only \
# --weight_only_precision int8 \
# --output_dir $checkpoint_dir --model_name distil-large-v2
trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
--output_dir ${output_dir}/encoder \
--moe_plugin disable \
--enable_xqa disable \
--max_batch_size ${MAX_BATCH_SIZE} \
--gemm_plugin disable \
--bert_attention_plugin ${INFERENCE_PRECISION} \
--max_input_len 3000 --max_seq_len=3000
trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
--output_dir ${output_dir}/decoder \
--moe_plugin disable \
--enable_xqa disable \
--max_beam_width ${MAX_BEAM_WIDTH} \
--max_batch_size ${MAX_BATCH_SIZE} \
--max_seq_len 114 \
--max_input_len 14 \
--max_encoder_input_len 3000 \
--gemm_plugin ${INFERENCE_PRECISION} \
--bert_attention_plugin ${INFERENCE_PRECISION} \
--gpt_attention_plugin ${INFERENCE_PRECISION}
# batch_size=32
# padding_strategy=zero
# dataset=wenet-e2e/wenetspeech
# dataset_name=TEST_MEETING
# python3 run.py --engine_dir $output_dir \
# --enable_warmup \
# --dataset $dataset \
# --dataset_name $dataset_name \
# --dataset_split test \
# --compute_cer \
# --text_prefix "<|startoftranscript|><|zh|><|transcribe|><|notimestamps|>" \
# --name aishell_${dataset_name}_${output_dir}_padding_${padding_strategy}_batch_${batch_size}_cppsession \
# --batch_size $batch_size --padding_strategy $padding_strategy