| """ASR model configuration.""" | |
| from transformers import PretrainedConfig | |
| class Config(PretrainedConfig): | |
| """Configuration class for asr-19m-v2. | |
| Parameters | |
| ---------- | |
| input_features : int | |
| Number of input features (mel filterbank features), default 80. | |
| vocab_size : int | |
| Vocabulary size of the model (SentencePiece tokens), default 256. | |
| torchscript_model_file : str | |
| Path to the TorchScript model file. | |
| tokenizer_file : str | |
| Path to the SentencePiece tokenizer model file. | |
| sample_rate : int | |
| Sample rate of the audio input, default 16000. | |
| """ | |
| model_type = "asr-19m-v2" | |
| def __init__( | |
| self, | |
| input_features=80, | |
| vocab_size=256, | |
| torchscript_model_file="asr-19m-v2-en-32b", | |
| tokenizer_file="sentencepiece_256.model", | |
| sample_rate=16000, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.input_features = input_features | |
| self.vocab_size = vocab_size | |
| self.torchscript_model_file = torchscript_model_file | |
| self.tokenizer_file = tokenizer_file | |
| self.sample_rate = sample_rate | |