Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -6,32 +6,43 @@ import torchaudio
|
|
6 |
from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
|
7 |
from pytube import YouTube
|
8 |
from transformers import pipeline
|
|
|
9 |
|
10 |
-
pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
@spaces.GPU
|
13 |
def transcribe_speech(audio):
|
14 |
-
if audio is None:
|
15 |
return "No audio received."
|
16 |
-
|
17 |
-
return
|
18 |
|
19 |
def transcribe_from_youtube(url):
|
20 |
# Download audio from YouTube using pytube
|
21 |
-
|
22 |
-
audio_path = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
|
23 |
|
24 |
-
# Transcribe the downloaded audio
|
25 |
transcription = transcribe_speech(audio_path)
|
26 |
|
27 |
-
# Clean up the downloaded file
|
28 |
os.remove(audio_path)
|
29 |
|
30 |
return transcription
|
31 |
|
32 |
def populate_metadata(url):
|
33 |
-
|
34 |
-
return yt.thumbnail_url, yt.title
|
35 |
|
36 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
37 |
gr.HTML(
|
@@ -49,7 +60,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
49 |
|
50 |
with gr.Tab("Microphone Input"):
|
51 |
gr.Markdown("## Transcribe speech from microphone")
|
52 |
-
mic_audio = gr.Audio(
|
53 |
transcribe_button = gr.Button("Transcribe")
|
54 |
transcription_output = gr.Textbox(label="Transcription")
|
55 |
|
|
|
6 |
from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
|
7 |
from pytube import YouTube
|
8 |
from transformers import pipeline
|
9 |
+
import re
|
10 |
|
11 |
+
# pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # old model
|
12 |
+
pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # new model with a new tokenizer
|
13 |
+
|
14 |
+
replacements = [
|
15 |
+
('гъ', 'ɣ'), ('дж', 'j'), ('дз', 'ӡ'), ('жь', 'ʐ'), ('кӏ', 'қ'),
|
16 |
+
('кхъ', 'qҳ'), ('къ', 'q'), ('лъ', 'ɬ'), ('лӏ', 'ԯ'), ('пӏ', 'ԥ'),
|
17 |
+
('тӏ', 'ҭ'), ('фӏ', 'ჶ'), ('хь', 'h'), ('хъ', 'ҳ'), ('цӏ', 'ҵ'),
|
18 |
+
('щӏ', 'ɕ'), ('я', 'йа')
|
19 |
+
]
|
20 |
+
|
21 |
+
reverse_replacements = {v: k for k, v in replacements}
|
22 |
+
reverse_pattern = re.compile('|'.join(re.escape(key) for key in reverse_replacements))
|
23 |
+
|
24 |
+
def replace_symbols_back(text):
|
25 |
+
return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
|
26 |
|
27 |
@spaces.GPU
|
28 |
def transcribe_speech(audio):
|
29 |
+
if audio is None:
|
30 |
return "No audio received."
|
31 |
+
transcription = pipe(audio, chunk_length_s=10)['text']
|
32 |
+
return replace_symbols_back(transcription)
|
33 |
|
34 |
def transcribe_from_youtube(url):
|
35 |
# Download audio from YouTube using pytube
|
36 |
+
audio_path = YouTube(url).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
|
|
|
37 |
|
|
|
38 |
transcription = transcribe_speech(audio_path)
|
39 |
|
|
|
40 |
os.remove(audio_path)
|
41 |
|
42 |
return transcription
|
43 |
|
44 |
def populate_metadata(url):
|
45 |
+
return YouTube(url).thumbnail_url, yt.title
|
|
|
46 |
|
47 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
48 |
gr.HTML(
|
|
|
60 |
|
61 |
with gr.Tab("Microphone Input"):
|
62 |
gr.Markdown("## Transcribe speech from microphone")
|
63 |
+
mic_audio = gr.Audio(source="microphone", type="filepath", label="Speak into your microphone")
|
64 |
transcribe_button = gr.Button("Transcribe")
|
65 |
transcription_output = gr.Textbox(label="Transcription")
|
66 |
|