distil-whisper
/

distil-medium.en

Automatic Speech Recognition

Transformers.js

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

sanchit-gandhi HF staff commited on Nov 2, 2023

Commit

ba9afe1

•

1 Parent(s): c76aba4

update eval

Files changed (1) hide show

README.md +3 -2

README.md CHANGED Viewed

@@ -292,6 +292,7 @@ model_id = "distil-whisper/distil-medium.en"
 # load the model + processor
 model =  AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, use_safetensors=True, low_cpu_mem_usage=True)
 processor = AutoProcessor.from_pretrained(model_id)
 # load the dataset with streaming mode
@@ -308,7 +309,7 @@ def inference(batch):
     input_features = input_features.to(device, dtype=torch_dtype)
     # 2. Auto-regressively generate the predicted token ids
-    pred_ids = model.generate(input_features, max_new_tokens=128, language="en", task="transcribe")
     # 3. Decode the token ids to the final transcription
     batch["transcription"] = processor.batch_decode(pred_ids, skip_special_tokens=True)
@@ -336,7 +337,7 @@ print(wer)
 ```
 **Print Output:**
 ```
-2.983685535968466
 ```
 ## Intended Use

 # load the model + processor
 model =  AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, use_safetensors=True, low_cpu_mem_usage=True)
+model = model.to(device)
 processor = AutoProcessor.from_pretrained(model_id)
 # load the dataset with streaming mode
     input_features = input_features.to(device, dtype=torch_dtype)
     # 2. Auto-regressively generate the predicted token ids
+    pred_ids = model.generate(input_features, max_new_tokens=128)
     # 3. Decode the token ids to the final transcription
     batch["transcription"] = processor.batch_decode(pred_ids, skip_special_tokens=True)
 ```
 **Print Output:**
 ```
+3.593196832001168
 ```
 ## Intended Use