anzorq commited on
Commit
3f40220
1 Parent(s): 6e35142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -11
app.py CHANGED
@@ -6,32 +6,43 @@ import torchaudio
6
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
7
  from pytube import YouTube
8
  from transformers import pipeline
 
9
 
10
- pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  @spaces.GPU
13
  def transcribe_speech(audio):
14
- if audio is None: # Handle the NoneType error for microphone input
15
  return "No audio received."
16
-
17
- return pipe(audio, chunk_length_s=10)['text']#, return_timestamps='word')
18
 
19
  def transcribe_from_youtube(url):
20
  # Download audio from YouTube using pytube
21
- yt = YouTube(url)
22
- audio_path = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
23
 
24
- # Transcribe the downloaded audio
25
  transcription = transcribe_speech(audio_path)
26
 
27
- # Clean up the downloaded file
28
  os.remove(audio_path)
29
 
30
  return transcription
31
 
32
  def populate_metadata(url):
33
- yt = YouTube(url)
34
- return yt.thumbnail_url, yt.title
35
 
36
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
37
  gr.HTML(
@@ -49,7 +60,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
49
 
50
  with gr.Tab("Microphone Input"):
51
  gr.Markdown("## Transcribe speech from microphone")
52
- mic_audio = gr.Audio(sources="microphone", type="filepath", label="Speak into your microphone")
53
  transcribe_button = gr.Button("Transcribe")
54
  transcription_output = gr.Textbox(label="Transcription")
55
 
 
6
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
7
  from pytube import YouTube
8
  from transformers import pipeline
9
+ import re
10
 
11
+ # pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # old model
12
+ pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0) # new model with a new tokenizer
13
+
14
+ replacements = [
15
+ ('гъ', 'ɣ'), ('дж', 'j'), ('дз', 'ӡ'), ('жь', 'ʐ'), ('кӏ', 'қ'),
16
+ ('кхъ', 'qҳ'), ('къ', 'q'), ('лъ', 'ɬ'), ('лӏ', 'ԯ'), ('пӏ', 'ԥ'),
17
+ ('тӏ', 'ҭ'), ('фӏ', 'ჶ'), ('хь', 'h'), ('хъ', 'ҳ'), ('цӏ', 'ҵ'),
18
+ ('щӏ', 'ɕ'), ('я', 'йа')
19
+ ]
20
+
21
+ reverse_replacements = {v: k for k, v in replacements}
22
+ reverse_pattern = re.compile('|'.join(re.escape(key) for key in reverse_replacements))
23
+
24
+ def replace_symbols_back(text):
25
+ return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
26
 
27
  @spaces.GPU
28
  def transcribe_speech(audio):
29
+ if audio is None:
30
  return "No audio received."
31
+ transcription = pipe(audio, chunk_length_s=10)['text']
32
+ return replace_symbols_back(transcription)
33
 
34
  def transcribe_from_youtube(url):
35
  # Download audio from YouTube using pytube
36
+ audio_path = YouTube(url).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
 
37
 
 
38
  transcription = transcribe_speech(audio_path)
39
 
 
40
  os.remove(audio_path)
41
 
42
  return transcription
43
 
44
  def populate_metadata(url):
45
+ return YouTube(url).thumbnail_url, yt.title
 
46
 
47
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
48
  gr.HTML(
 
60
 
61
  with gr.Tab("Microphone Input"):
62
  gr.Markdown("## Transcribe speech from microphone")
63
+ mic_audio = gr.Audio(source="microphone", type="filepath", label="Speak into your microphone")
64
  transcribe_button = gr.Button("Transcribe")
65
  transcription_output = gr.Textbox(label="Transcription")
66