John Langley commited on
Commit
7cbc2f4
1 Parent(s): 85533cd

Initial checkin

Browse files
Files changed (3) hide show
  1. app.py +50 -0
  2. packages.txt +1 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from transformers.pipelines.audio_utils import ffmpeg_read
6
+
7
+ MODEL_NAME = "openai/whisper-large-v3"
8
+ BATCH_SIZE = 8
9
+ FILE_LIMIT_MB = 1000
10
+ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
11
+
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+
14
+ pipe = pipeline(
15
+ task="automatic-speech-recognition",
16
+ model=MODEL_NAME,
17
+ chunk_length_s=30,
18
+ device=device,
19
+ )
20
+
21
+
22
+ def transcribe(inputs):
23
+ if inputs is None:
24
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
25
+
26
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
27
+ return text
28
+
29
+
30
+ demo = gr.Blocks()
31
+
32
+ mf_transcribe = gr.Interface(
33
+ fn=transcribe,
34
+ inputs=[
35
+ gr.Audio(sources=["microphone"], type="filepath", scale=4),
36
+ ],
37
+ outputs="text",
38
+ title="Whisper Large V3: Transcribe Audio",
39
+ description=(
40
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
41
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
42
+ " of arbitrary length."
43
+ )
44
+ )
45
+
46
+
47
+ with demo:
48
+ gr.TabbedInterface([mf_transcribe], ["Microphone"])
49
+
50
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ torch