yuekai
/

whisper_multi_zh_tllm_checkpoint

Model card Files Files and versions Community

whisper_multi_zh_tllm_checkpoint / build.sh

yuekai's picture

Upload folder using huggingface_hub

90edbdc verified about 2 months ago

history blame contribute delete

2.21 kB



	INFERENCE_PRECISION=float16
	MAX_BEAM_WIDTH=4
	MAX_BATCH_SIZE=64

	checkpoint_dir=multi_zh_distil_tllm_checkpoint_pos_emb_true
	output_dir=distil_whisper_multi_zh_remove_padding
	# python3 convert_checkpoint.py \
	# --output_dir $checkpoint_dir \
	# --model_name distil-large-v2

	# checkpoint_dir=multi_zh_distil_tllm_int8_checkpoint_pos_emb_true
	# output_dir=distil_whisper_multi_zh_int8_remove_padding
	# python3 convert_checkpoint.py --use_weight_only \
	# --weight_only_precision int8 \
	# --output_dir $checkpoint_dir --model_name distil-large-v2

	trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
	--output_dir ${output_dir}/encoder \
	--moe_plugin disable \
	--enable_xqa disable \
	--max_batch_size ${MAX_BATCH_SIZE} \
	--gemm_plugin disable \
	--bert_attention_plugin ${INFERENCE_PRECISION} \
	--max_input_len 3000 --max_seq_len=3000


	trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
	--output_dir ${output_dir}/decoder \
	--moe_plugin disable \
	--enable_xqa disable \
	--max_beam_width ${MAX_BEAM_WIDTH} \
	--max_batch_size ${MAX_BATCH_SIZE} \
	--max_seq_len 114 \
	--max_input_len 14 \
	--max_encoder_input_len 3000 \
	--gemm_plugin ${INFERENCE_PRECISION} \
	--bert_attention_plugin ${INFERENCE_PRECISION} \
	--gpt_attention_plugin ${INFERENCE_PRECISION}


	# batch_size=32
	# padding_strategy=zero
	# dataset=wenet-e2e/wenetspeech
	# dataset_name=TEST_MEETING
	# python3 run.py --engine_dir $output_dir \
	# --enable_warmup \
	# --dataset $dataset \
	# --dataset_name $dataset_name \
	# --dataset_split test \
	# --compute_cer \
	# --text_prefix "<\|startoftranscript\|><\|zh\|><\|transcribe\|><\|notimestamps\|>" \
	# --name aishell_${dataset_name}_${output_dir}_padding_${padding_strategy}_batch_${batch_size}_cppsession \
	# --batch_size $batch_size --padding_strategy $padding_strategy