anzorq commited on
Commit
d1e3f48
·
verified ·
1 Parent(s): dbc9269

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -7,6 +7,7 @@ from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
7
  from pytube import YouTube
8
  from transformers import pipeline
9
  import re
 
10
 
11
  pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd-v2", device=0)
12
 
@@ -24,7 +25,16 @@ def replace_symbols_back(text):
24
  return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
25
 
26
  @spaces.GPU
27
- def transcribe_speech(stream, new_chunk):
 
 
 
 
 
 
 
 
 
28
  if new_chunk is None: # Handle the NoneType error for microphone input
29
  return "No audio received.", ""
30
 
@@ -46,7 +56,7 @@ def transcribe_from_youtube(url, progress=gr.Progress()):
46
  audio_path = YouTube(url).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
47
 
48
  progress(0.5, "Transcribing audio...")
49
- _, transcription = transcribe_speech(None, audio_path)
50
 
51
  return audio_path, transcription
52
 
@@ -73,7 +83,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
73
  mic_audio = gr.Audio(sources='microphone', streaming=True)
74
  transcription_output = gr.Textbox(label="Transcription", lines=10)
75
 
76
- mic_audio.stream(fn=transcribe_speech, inputs=[gr.State(), mic_audio], outputs=[gr.State(), transcription_output])
 
 
 
 
 
 
 
 
77
 
78
  with gr.Tab("YouTube URL"):
79
  gr.Markdown("## Transcribe speech from YouTube video")
 
7
  from pytube import YouTube
8
  from transformers import pipeline
9
  import re
10
+ import numpy as np
11
 
12
  pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd-v2", device=0)
13
 
 
25
  return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
26
 
27
  @spaces.GPU
28
+ def transcribe_speech(audio):
29
+ if audio is None: # Handle the NoneType error for microphone input
30
+ return "No audio received."
31
+
32
+ transcription = pipe(audio, chunk_length_s=10)['text']
33
+
34
+ return replace_symbols_back(transcription)
35
+
36
+ @spaces.GPU
37
+ def transcribe_streaming(stream, new_chunk):
38
  if new_chunk is None: # Handle the NoneType error for microphone input
39
  return "No audio received.", ""
40
 
 
56
  audio_path = YouTube(url).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
57
 
58
  progress(0.5, "Transcribing audio...")
59
+ transcription = transcribe_speech(audio_path)
60
 
61
  return audio_path, transcription
62
 
 
83
  mic_audio = gr.Audio(sources='microphone', streaming=True)
84
  transcription_output = gr.Textbox(label="Transcription", lines=10)
85
 
86
+ mic_audio.stream(fn=transcribe_streaming, inputs=[gr.State(), mic_audio], outputs=[gr.State(), transcription_output])
87
+
88
+ with gr.Tab("File Upload"):
89
+ gr.Markdown("## Transcribe speech from uploaded file")
90
+ upload_audio = gr.Audio(sources="upload", type="filepath")
91
+ transcribe_button = gr.Button("Transcribe")
92
+ file_transcription_output = gr.Textbox(label="Transcription")
93
+
94
+ transcribe_button.click(fn=transcribe_speech, inputs=upload_audio, outputs=file_transcription_output)
95
 
96
  with gr.Tab("YouTube URL"):
97
  gr.Markdown("## Transcribe speech from YouTube video")