alex buz commited on
Commit
1cd886e
1 Parent(s): da77537
Files changed (7) hide show
  1. app copy 2.py +39 -52
  2. app copy 3.py +0 -53
  3. app copy 4.py +0 -64
  4. app copy 5.py +0 -59
  5. app copy 6.py +0 -47
  6. app copy.py +14 -49
  7. requirements.txt +84 -0
app copy 2.py CHANGED
@@ -1,57 +1,44 @@
1
  import gradio as gr
2
- import speech_recognition as sr
3
- import os
4
-
5
- def transcribe_audio(file_path):
6
- """Transcribes audio to text using the speech_recognition library."""
7
- recognizer = sr.Recognizer()
8
- with sr.AudioFile(file_path) as source:
9
- audio_data = recognizer.record(source)
10
- try:
11
- text = recognizer.recognize_google(audio_data)
12
- return text
13
- except sr.UnknownValueError:
14
- return "Google Speech Recognition could not understand audio"
15
- except sr.RequestError as e:
16
- return f"Could not request results from Google Speech Recognition service; {e}"
17
-
18
- def handle_transcription(file_info):
19
- """Handle transcription after recording."""
20
- if file_info is None:
21
- return f" 111 No audio recorded or file not found: {file_info}"
22
- print (file_info)
23
- file_path = file_info
24
- if os.path.exists(file_path):
25
- return transcribe_audio(file_path)
26
- return f"222 No audio recorded or file not found: {file_info}"
 
27
 
28
  with gr.Blocks() as demo:
29
- gr.Markdown("### Voice Recorder and Transcriber")
30
- audio_box = gr.Audio(label="Record Audio", sources="microphone", type="filepath", elem_id='audio')
31
-
32
- with gr.Row():
33
- record_btn = gr.Button('Record/Stop')
34
- transcribe_btn = gr.Button('Transcribe')
35
-
36
- output_text = gr.Textbox(label="Transcription Output")
37
-
38
- def manage_record(recording_state):
39
- """Toggle recording and manage UI updates."""
40
- return not recording_state, "Stop" if not recording_state else "Record"
41
-
42
- state = gr.State(False) # False indicates not recording, True indicates recording
43
-
44
- record_btn.click(
45
- fn=manage_record,
46
- inputs=state,
47
- outputs=[state, record_btn],
48
- js="document.getElementById('audio').value = null; document.getElementById('audio').click();"
49
- )
50
-
51
- transcribe_btn.click(
52
- fn=handle_transcription,
53
- inputs=audio_box,
54
- outputs=output_text
55
  )
56
 
57
- demo.launch(debug=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
6
+ qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
7
+
8
+ def transcribe(audio):
9
+ if audio is None:
10
+ return "No audio recorded."
11
+ sr, y = audio
12
+ y = y.astype(np.float32)
13
+ y /= np.max(np.abs(y))
14
+
15
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
16
+
17
+ def answer(transcription):
18
+ # This is a placeholder. In a real scenario, you'd have a predefined context or retrieve it based on the transcription.
19
+ context = "Gradio is a Python library for building machine learning web apps. It was created to make it easy for machine learning developers to demo their work."
20
+
21
+ result = qa_model(question=transcription, context=context)
22
+ return result['answer']
23
+
24
+ def process_audio(audio):
25
+ transcription = transcribe(audio)
26
+ answer_result = answer(transcription)
27
+ return transcription, answer_result
28
 
29
  with gr.Blocks() as demo:
30
+ gr.Markdown("# Audio Transcription and Question Answering")
31
+
32
+ audio_input = gr.Audio(label="Audio Input", sources=["microphone"])
33
+ transcription_output = gr.Textbox(label="Transcription")
34
+ answer_output = gr.Textbox(label="Answer Result")
35
+
36
+ submit_button = gr.Button("Submit")
37
+
38
+ submit_button.click(
39
+ fn=process_audio,
40
+ inputs=[audio_input],
41
+ outputs=[transcription_output, answer_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  )
43
 
44
+ demo.launch()
app copy 3.py DELETED
@@ -1,53 +0,0 @@
1
- import gradio as gr
2
- import speech_recognition as sr
3
- import os
4
-
5
- def transcribe_audio(file_path):
6
- """Transcribes audio to text using the speech_recognition library."""
7
- recognizer = sr.Recognizer()
8
- with sr.AudioFile(file_path) as source:
9
- audio_data = recognizer.record(source)
10
- try:
11
- text = recognizer.recognize_google(audio_data)
12
- return text
13
- except sr.UnknownValueError:
14
- return "Google Speech Recognition could not understand audio"
15
- except sr.RequestError as e:
16
- return f"Could not request results from Google Speech Recognition service; {e}"
17
-
18
- def handle_transcription(file_info):
19
- """Handle transcription after recording."""
20
- if file_info is None:
21
- return "No audio recorded or file not found."
22
- file_path = file_info
23
- if os.path.exists(file_path):
24
- return transcribe_audio(file_path)
25
- return "No audio recorded or file not found."
26
-
27
- with gr.Blocks() as demo:
28
- gr.Markdown("### Voice Recorder and Transcriber")
29
- audio_box = gr.Audio(label="Record Audio", type="filepath", elem_id='audio')
30
-
31
- with gr.Row():
32
- record_btn = gr.Button('Record')
33
- transcribe_btn = gr.Button('Transcribe')
34
-
35
- output_text = gr.Textbox(label="Transcription Output")
36
-
37
- def toggle_record(button_text):
38
- """Toggle the button text and manage the recording."""
39
- return "Stop" if button_text == "Record" else "Record"
40
-
41
- record_btn.click(
42
- fn=toggle_record,
43
- inputs=record_btn,
44
- outputs=record_btn
45
- )
46
-
47
- transcribe_btn.click(
48
- fn=handle_transcription,
49
- inputs=audio_box,
50
- outputs=output_text
51
- )
52
-
53
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app copy 4.py DELETED
@@ -1,64 +0,0 @@
1
- import gradio as gr
2
- import speech_recognition as sr
3
- import os
4
-
5
- def transcribe_audio(file_path):
6
- """Transcribes audio to text using the speech_recognition library."""
7
- recognizer = sr.Recognizer()
8
- with sr.AudioFile(file_path) as source:
9
- audio_data = recognizer.record(source)
10
- try:
11
- text = recognizer.recognize_google(audio_data)
12
- return text
13
- except sr.UnknownValueError:
14
- return "Google Speech Recognition could not understand audio"
15
- except sr.RequestError as e:
16
- return f"Could not request results from Google Speech Recognition service; {e}"
17
-
18
- def handle_transcription(file_info):
19
- """Handle transcription after recording."""
20
- if file_info is None:
21
- return "No audio recorded or file not found."
22
- file_path = file_info
23
- if os.path.exists(file_path):
24
- return transcribe_audio(file_path)
25
- return "No audio recorded or file not found."
26
-
27
- with gr.Blocks() as demo:
28
- gr.Markdown("### Voice Recorder and Transcriber")
29
- audio_box = gr.Audio(label="Record Audio", sources="microphone", type="filepath", elem_id='audio')
30
-
31
- with gr.Row():
32
- record_btn = gr.Button('Record')
33
- transcribe_btn = gr.Button('Transcribe')
34
-
35
- output_text = gr.Textbox(label="Transcription Output")
36
-
37
- def create_toggle_record(record_btn):
38
- print(111)
39
- def toggle_record( button_text):
40
- if button_text == "Record":
41
- print(222)
42
- print(audio_box)
43
- audio_box.start_recording()
44
- return "Stop" # Return new button text (Stop)
45
- else:
46
- audio_box.stop_recording()
47
- return "Record" # Return new button text (Record)
48
- return toggle_record
49
-
50
- # Create the closure and connect it to the button click
51
- toggle_record_fn = create_toggle_record(record_btn)
52
- record_btn.click(
53
- fn=toggle_record_fn,
54
- inputs=[record_btn], # Pass only the button (no need for text)
55
- outputs=record_btn
56
- )
57
-
58
- transcribe_btn.click(
59
- fn=handle_transcription,
60
- inputs=audio_box,
61
- outputs=output_text
62
- )
63
-
64
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app copy 5.py DELETED
@@ -1,59 +0,0 @@
1
- import gradio as gr
2
- import time
3
- import speech_recognition as sr
4
-
5
- def transcribe(audio):
6
- if audio is None:
7
- return "No audio recorded."
8
-
9
- recognizer = sr.Recognizer()
10
- with sr.AudioFile(audio) as source:
11
- audio_data = recognizer.record(source)
12
- try:
13
- text = recognizer.recognize_google(audio_data)
14
- return text
15
- except sr.UnknownValueError:
16
- return "Google Speech Recognition could not understand audio"
17
- except sr.RequestError as e:
18
- return f"Could not request results from Google Speech Recognition service; {e}"
19
-
20
- def toggle_recording(audio, state):
21
- if state == "Idle":
22
- return None, "Recording", "Recording... Click 'Stop' when finished."
23
- else:
24
- time.sleep(1) # Small delay to ensure audio is processed
25
- if audio is not None:
26
- transcription = transcribe(audio)
27
- return None, "Idle", transcription
28
- else:
29
- return None, "Idle", "No audio recorded."
30
-
31
- with gr.Blocks() as demo:
32
- audio = gr.Audio(sources="microphone", type="filepath", elem_id="audio-component")
33
- button = gr.Button("Record", elem_id="record-button")
34
- state = gr.State("Idle")
35
- output = gr.Textbox(label="Transcription")
36
-
37
- button.click(
38
- fn=toggle_recording,
39
- inputs=[audio, state],
40
- outputs=[audio, state, output],
41
- js="""
42
- async (audio, state) => {
43
- const audioEl = document.querySelector('#audio-component audio');
44
- const recordButton = document.querySelector('#record-button');
45
-
46
- if (state === "Idle") {
47
- await audio.startRecording();
48
- recordButton.textContent = "Stop";
49
- return [null, "Recording", "Recording... Click 'Stop' when finished."];
50
- } else {
51
- await audio.stopRecording();
52
- recordButton.textContent = "Record";
53
- return [await audio.getValue(), "Idle", "Processing..."];
54
- }
55
- }
56
- """
57
- )
58
-
59
- demo.queue().launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app copy 6.py DELETED
@@ -1,47 +0,0 @@
1
- import gradio as gr
2
-
3
- def click_js():
4
- return """
5
- function(audio_btn, update_status) {
6
- const recordBtn = document.querySelector('#audio button');
7
- if (audio_btn == 'Speak') {
8
- recordBtn.click(); // Start recording
9
- update_status('Stop'); // Update the button to show 'Stop'
10
- return 'Recording...';
11
- } else {
12
- recordBtn.click(); // Stop recording
13
- update_status('Speak'); // Reset button text
14
- return new Promise(resolve => {
15
- setTimeout(() => { // Wait a small delay to ensure recording has stopped
16
- resolve('Done recording');
17
- }, 500);
18
- });
19
- }
20
- }
21
- """
22
-
23
- def transcribe(recording_status):
24
- if recording_status == 'Done recording':
25
- print('Transcribing...')
26
- return 'Success'
27
- else:
28
- return recording_status
29
-
30
- with gr.Blocks() as demo:
31
- msg = gr.Textbox()
32
- audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
33
-
34
- with gr.Row():
35
- audio_btn = gr.Button('Speak')
36
- clear = gr.Button("Clear")
37
-
38
- audio_btn.click(
39
- js=click_js(),
40
- inputs=[audio_btn],
41
- outputs=[audio_btn, msg],
42
- fn=transcribe
43
- )
44
-
45
- clear.click(lambda: "", inputs=None, outputs=msg, queue=False)
46
-
47
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app copy.py CHANGED
@@ -1,56 +1,21 @@
1
  import gradio as gr
2
- import speech_recognition as sr
3
- from pprint import pprint as pp
4
- import os
5
 
6
- def transcribe_audio(file_path):
7
- """Transcribes audio to text using the speech_recognition library."""
8
- recognizer = sr.Recognizer()
9
- with sr.AudioFile(file_path) as source:
10
- audio_data = recognizer.record(source)
11
- try:
12
- # Using Google's speech recognition service. Note: It requires internet.
13
- text = recognizer.recognize_google(audio_data)
14
- return text
15
- except sr.UnknownValueError:
16
- return "Google Speech Recognition could not understand audio"
17
- except sr.RequestError as e:
18
- return f"Could not request results from Google Speech Recognition service; {e}"
19
 
20
- def manage_record():
21
- """Toggle recording and manage UI updates."""
22
- js_code = """
23
- const btn = document.getElementById('record_btn');
24
- const recordingText = 'Stop';
25
- const idleText = 'Record';
26
- if (btn.textContent.includes(idleText)) {
27
- btn.textContent = recordingText;
28
- } else {
29
- btn.textContent = idleText;
30
- }
31
- """
32
- return gr.update(js=js_code)
33
 
34
- def handle_transcription(file_info):
35
- """Handle transcription after recording."""
36
- print(file_info)
37
- file_path = file_info
38
- print(file_path )
39
- if os.path.exists(file_path):
40
- return transcribe_audio(file_path)
41
- return "No audio recorded or file not found."
42
 
43
- with gr.Blocks() as demo:
44
- gr.Markdown("### Voice Recorder and Transcriber")
45
- audio_box = gr.Audio(label="Record Audio", sources="microphone", type="filepath", elem_id='audio')
46
 
47
- with gr.Row():
48
- record_btn = gr.Button('Record/Stop', elem_id='record_btn')
49
- transcribe_btn = gr.Button('Transcribe')
 
 
50
 
51
- output_text = gr.Textbox(label="Transcription Output")
52
-
53
- record_btn.click(fn=manage_record)
54
- transcribe_btn.click(fn=handle_transcription, inputs=audio_box, outputs=output_text)
55
-
56
- demo.launch(debug=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
 
4
 
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ def transcribe(audio):
8
+ sr, y = audio
9
+ y = y.astype(np.float32)
10
+ y /= np.max(np.abs(y))
 
 
 
 
 
 
 
 
 
11
 
12
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
 
 
 
 
 
 
 
13
 
 
 
 
14
 
15
+ demo = gr.Interface(
16
+ transcribe,
17
+ gr.Audio(sources=["microphone"]),
18
+ "text",
19
+ )
20
 
21
+ demo.launch()
 
 
 
 
 
requirements.txt ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
+ certifi==2024.7.4
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ colorama==0.4.6
10
+ contourpy==1.2.1
11
+ cycler==0.12.1
12
+ dnspython==2.6.1
13
+ email_validator==2.2.0
14
+ fastapi==0.111.1
15
+ fastapi-cli==0.0.4
16
+ ffmpy==0.3.2
17
+ filelock==3.15.4
18
+ fonttools==4.53.1
19
+ fsspec==2024.6.1
20
+ gradio==4.29.0
21
+ gradio_client==0.16.1
22
+ h11==0.14.0
23
+ httpcore==1.0.5
24
+ httptools==0.6.1
25
+ httpx==0.27.0
26
+ huggingface-hub==0.23.5
27
+ idna==3.7
28
+ importlib_resources==6.4.0
29
+ intel-openmp==2021.4.0
30
+ Jinja2==3.1.4
31
+ jsonschema==4.23.0
32
+ jsonschema-specifications==2023.12.1
33
+ kiwisolver==1.4.5
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.5
36
+ matplotlib==3.9.1
37
+ mdurl==0.1.2
38
+ mkl==2021.4.0
39
+ mpmath==1.3.0
40
+ networkx==3.3
41
+ numpy==1.26.4
42
+ orjson==3.10.6
43
+ packaging==24.1
44
+ pandas==2.2.2
45
+ pillow==10.4.0
46
+ pydantic==2.8.2
47
+ pydantic_core==2.20.1
48
+ pydub==0.25.1
49
+ Pygments==2.18.0
50
+ pyparsing==3.1.2
51
+ python-dateutil==2.9.0.post0
52
+ python-dotenv==1.0.1
53
+ python-multipart==0.0.9
54
+ pytz==2024.1
55
+ PyYAML==6.0.1
56
+ referencing==0.35.1
57
+ regex==2024.5.15
58
+ requests==2.32.3
59
+ rich==13.7.1
60
+ rpds-py==0.19.0
61
+ ruff==0.5.2
62
+ safetensors==0.4.3
63
+ semantic-version==2.10.0
64
+ shellingham==1.5.4
65
+ six==1.16.0
66
+ sniffio==1.3.1
67
+ SpeechRecognition==3.10.4
68
+ starlette==0.37.2
69
+ sympy==1.13.0
70
+ tbb==2021.13.0
71
+ tokenizers==0.19.1
72
+ tomlkit==0.12.0
73
+ toolz==0.12.1
74
+ torch==2.3.1
75
+ torchaudio==2.3.1
76
+ tqdm==4.66.4
77
+ transformers==4.42.4
78
+ typer==0.12.3
79
+ typing_extensions==4.12.2
80
+ tzdata==2024.1
81
+ urllib3==2.2.2
82
+ uvicorn==0.30.1
83
+ watchfiles==0.22.0
84
+ websockets==11.0.3