reyrobs commited on
Commit
1b51978
·
1 Parent(s): a8720ca

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +61 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from pytube import YouTube
3
+
4
+ import gradio as gr
5
+ import librosa
6
+
7
+ import hopsworks
8
+
9
+ project = hopsworks.login()
10
+ fs = project.get_feature_store()
11
+
12
+ dataset_api = project.get_dataset_api()
13
+
14
+ dataset_api.download("Resources/best_model/model_scores.png", overwrite=True) # change link
15
+
16
+ pipe = pipeline(model="ayberkuckun/whisper-small-se-hyperparameter-searched", task="automatic-speech-recognition",
17
+ chunk_length_s=30)
18
+
19
+
20
+ def transcribe(mic=None, url=None):
21
+ if (mic is None) and (len(url) == 0):
22
+ return None, "ERROR: You have to either use the microphone or paste a Youtube URL", None
23
+
24
+ elif (mic is not None) and (len(url) > 0):
25
+ return mic, pipe(mic)["text"], "model_scores.png"
26
+
27
+ elif (len(url) > 0):
28
+ selected_video = YouTube(url)
29
+
30
+ try:
31
+ audio = selected_video.streams.filter(only_audio=True)[0]
32
+ except:
33
+ raise Exception("Can't find an mp4 audio.")
34
+
35
+ audio.download(filename="audio.mp3")
36
+
37
+ speech_array, _ = librosa.load("audio.mp3", sr=16000)
38
+
39
+ output = pipe(speech_array)
40
+
41
+ return "audio.mp3", output["text"], "model_scores.png"
42
+ else:
43
+ return mic, pipe(mic)["text"], "model_scores.png"
44
+
45
+
46
+ iface = gr.Interface(
47
+ fn=transcribe,
48
+ inputs=[
49
+ gr.Audio(source="microphone", type="filepath", optional=True),
50
+ gr.Textbox("https://www.youtube.com/shorts/1j2lRgtsDZk", label="Paste a YouTube video URL", optional=True),
51
+ ],
52
+ outputs=[gr.Audio(label="Transcripted Audio"),
53
+ gr.Textbox(label="Transcription"),
54
+ gr.Image(label="Model Scores")
55
+ ],
56
+ title="Whisper Small Swedish",
57
+ description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
58
+ allow_flagging="never"
59
+ )
60
+
61
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ hopsworks
2
+ librosa
3
+ gradio
4
+ git+https://github.com/huggingface/transformers
5
+ git+https://github.com/pytube/pytube
6
+ torch
7
+ sentencepiece