Spaces:

anzorq
/

w2v-bert-2.0-kbd

Paused

App Files Files Community

anzorq commited on May 20, 2024

Commit

3f40220

verified ·

1 Parent(s): 6e35142

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -11

app.py CHANGED Viewed

@@ -6,32 +6,43 @@ import torchaudio
 from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
 from pytube import YouTube
 from transformers import pipeline
-pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0)
 @spaces.GPU
 def transcribe_speech(audio):
-    if audio is None:  # Handle the NoneType error for microphone input
         return "No audio received."
-    return pipe(audio, chunk_length_s=10)['text']#, return_timestamps='word')
 def transcribe_from_youtube(url):
     # Download audio from YouTube using pytube
-    yt = YouTube(url)
-    audio_path = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
-    # Transcribe the downloaded audio
     transcription = transcribe_speech(audio_path)
-    # Clean up the downloaded file
     os.remove(audio_path)
     return transcription
 def populate_metadata(url):
-    yt = YouTube(url)
-    return yt.thumbnail_url, yt.title
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML(
@@ -49,7 +60,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tab("Microphone Input"):
         gr.Markdown("## Transcribe speech from microphone")
-        mic_audio = gr.Audio(sources="microphone", type="filepath", label="Speak into your microphone")
         transcribe_button = gr.Button("Transcribe")
         transcription_output = gr.Textbox(label="Transcription")

 from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
 from pytube import YouTube
 from transformers import pipeline
+import re
+# pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # old model
+pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # new model with a new tokenizer
+replacements = [
+    ('гъ', 'ɣ'), ('дж', 'j'), ('дз', 'ӡ'), ('жь', 'ʐ'), ('кӏ', 'қ'),
+    ('кхъ', 'qҳ'), ('къ', 'q'), ('лъ', 'ɬ'), ('лӏ', 'ԯ'), ('пӏ', 'ԥ'),
+    ('тӏ', 'ҭ'), ('фӏ', 'ჶ'), ('хь', 'h'), ('хъ', 'ҳ'), ('цӏ', 'ҵ'),
+    ('щӏ', 'ɕ'), ('я', 'йа')
+]
+reverse_replacements = {v: k for k, v in replacements}
+reverse_pattern = re.compile('|'.join(re.escape(key) for key in reverse_replacements))
+def replace_symbols_back(text):
+    return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
 @spaces.GPU
 def transcribe_speech(audio):
+    if audio is None:
         return "No audio received."
+    transcription = pipe(audio, chunk_length_s=10)['text']
+    return replace_symbols_back(transcription)
 def transcribe_from_youtube(url):
     # Download audio from YouTube using pytube
+    audio_path = YouTube(url).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
     transcription = transcribe_speech(audio_path)
     os.remove(audio_path)
     return transcription
 def populate_metadata(url):
+    return YouTube(url).thumbnail_url, yt.title
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML(
     with gr.Tab("Microphone Input"):
         gr.Markdown("## Transcribe speech from microphone")
+        mic_audio = gr.Audio(source="microphone", type="filepath", label="Speak into your microphone")
         transcribe_button = gr.Button("Transcribe")
         transcription_output = gr.Textbox(label="Transcription")