Kilos1 commited on
Commit
be917bf
1 Parent(s): 4207289

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the Gradio library for creating web interfaces
2
+ import gradio as gr
3
+ # Import the pipeline module from transformers for using pre-trained models
4
+ from transformers import pipeline
5
+ # Import numpy for numerical operations
6
+ import numpy as np
7
+
8
+ # Initialize the automatic speech recognition pipeline using the Whisper base English model
9
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
10
+
11
+ # Define the transcription function that takes audio input and returns transcribed text
12
+ def transcribe(stream,new_chunk):
13
+ # Unpack the audio tuple into sample rate (sr) and audio data (y)
14
+ sr, y = new_chunk
15
+ # Convert the audio data to 32-bit float
16
+ y = y.astype(np.float32)
17
+ # Normalize the audio data to be between -1 and 1
18
+ y /= np.max(np.abs(y))
19
+
20
+ if stream is not None:
21
+ stream = np.concatenate([stream, y])
22
+ else:
23
+ stream = y
24
+
25
+ # Use the transcriber to convert audio to text and return the result
26
+ return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
27
+
28
+
29
+ # Create a Gradio interface for the transcribe function
30
+ demo = gr.Interface(
31
+ # Specify the function to run
32
+ transcribe,
33
+ # Define the input component as an audio recorder with microphone source
34
+ ["state", gr.Audio(sources=["microphone"], streaming=True)],
35
+ # Specify the output component as text
36
+ ["state", "text"],
37
+ live = True
38
+ )
39
+
40
+ demo.launch()